LLVM 9.0.1
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/None.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SetVector.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/Constant.h"
50#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/Metadata.h"
59#include "llvm/Support/Debug.h"
67#include <algorithm>
68#include <cassert>
69#include <cstdint>
70#include <functional>
71#include <iterator>
72#include <string>
73#include <tuple>
74#include <utility>
75
76using namespace llvm;
77
78#define DEBUG_TYPE "dagcombine"
79
80STATISTIC(NodesCombined , "Number of dag nodes combined");
81STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
84STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
85STATISTIC(SlicedLoads, "Number of load sliced");
86STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87
88static cl::opt<bool>
89CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90 cl::desc("Enable DAG combiner's use of IR alias analysis"));
91
92static cl::opt<bool>
93UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94 cl::desc("Enable DAG combiner's use of TBAA"));
95
96#ifndef NDEBUG
98CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99 cl::desc("Only use DAG-combiner alias analysis in this"
100 " function"));
101#endif
102
103/// Hidden option to stress test load slicing, i.e., when this option
104/// is enabled, load slicing bypasses most of its profitability guards.
105static cl::opt<bool>
106StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107 cl::desc("Bypass the profitability model of load slicing"),
108 cl::init(false));
109
110static cl::opt<bool>
111 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112 cl::desc("DAG combiner may split indexing from loads"));
113
115 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
116 cl::desc("Limit the number of operands to inline for Token Factors"));
117
118namespace {
119
120 class DAGCombiner {
121 SelectionDAG &DAG;
122 const TargetLowering &TLI;
123 CombineLevel Level;
124 CodeGenOpt::Level OptLevel;
125 bool LegalOperations = false;
126 bool LegalTypes = false;
127 bool ForCodeSize;
128
129 /// Worklist of all of the nodes that need to be simplified.
130 ///
131 /// This must behave as a stack -- new nodes to process are pushed onto the
132 /// back and when processing we pop off of the back.
133 ///
134 /// The worklist will not contain duplicates but may contain null entries
135 /// due to nodes being deleted from the underlying DAG.
137
138 /// Mapping from an SDNode to its position on the worklist.
139 ///
140 /// This is used to find and remove nodes from the worklist (by nulling
141 /// them) when they are deleted from the underlying DAG. It relies on
142 /// stable indices of nodes within the worklist.
144 /// This records all nodes attempted to add to the worklist since we
145 /// considered a new worklist entry. As we keep do not add duplicate nodes
146 /// in the worklist, this is different from the tail of the worklist.
148
149 /// Set of nodes which have been combined (at least once).
150 ///
151 /// This is used to allow us to reliably add any operands of a DAG node
152 /// which have not yet been combined to the worklist.
153 SmallPtrSet<SDNode *, 32> CombinedNodes;
154
155 // AA - Used for DAG load/store alias analysis.
156 AliasAnalysis *AA;
157
158 /// When an instruction is simplified, add all users of the instruction to
159 /// the work lists because they might get more simplified now.
160 void AddUsersToWorklist(SDNode *N) {
161 for (SDNode *Node : N->uses())
162 AddToWorklist(Node);
163 }
164
165 // Prune potentially dangling nodes. This is called after
166 // any visit to a node, but should also be called during a visit after any
167 // failed combine which may have created a DAG node.
168 void clearAddedDanglingWorklistEntries() {
169 // Check any nodes added to the worklist to see if they are prunable.
170 while (!PruningList.empty()) {
171 auto *N = PruningList.pop_back_val();
172 if (N->use_empty())
173 recursivelyDeleteUnusedNodes(N);
174 }
175 }
176
177 SDNode *getNextWorklistEntry() {
178 // Before we do any work, remove nodes that are not in use.
179 clearAddedDanglingWorklistEntries();
180 SDNode *N = nullptr;
181 // The Worklist holds the SDNodes in order, but it may contain null
182 // entries.
183 while (!N && !Worklist.empty()) {
184 N = Worklist.pop_back_val();
185 }
186
187 if (N) {
188 bool GoodWorklistEntry = WorklistMap.erase(N);
189 (void)GoodWorklistEntry;
190 assert(GoodWorklistEntry &&
191 "Found a worklist entry without a corresponding map entry!");
192 }
193 return N;
194 }
195
196 /// Call the node-specific routine that folds each particular type of node.
197 SDValue visit(SDNode *N);
198
199 public:
200 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
201 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
202 OptLevel(OL), AA(AA) {
203 ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
204
205 MaximumLegalStoreInBits = 0;
206 for (MVT VT : MVT::all_valuetypes())
207 if (EVT(VT).isSimple() && VT != MVT::Other &&
208 TLI.isTypeLegal(EVT(VT)) &&
209 VT.getSizeInBits() >= MaximumLegalStoreInBits)
210 MaximumLegalStoreInBits = VT.getSizeInBits();
211 }
212
213 void ConsiderForPruning(SDNode *N) {
214 // Mark this for potential pruning.
215 PruningList.insert(N);
216 }
217
218 /// Add to the worklist making sure its instance is at the back (next to be
219 /// processed.)
220 void AddToWorklist(SDNode *N) {
221 assert(N->getOpcode() != ISD::DELETED_NODE &&
222 "Deleted Node added to Worklist");
223
224 // Skip handle nodes as they can't usefully be combined and confuse the
225 // zero-use deletion strategy.
226 if (N->getOpcode() == ISD::HANDLENODE)
227 return;
228
229 ConsiderForPruning(N);
230
231 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
232 Worklist.push_back(N);
233 }
234
235 /// Remove all instances of N from the worklist.
236 void removeFromWorklist(SDNode *N) {
237 CombinedNodes.erase(N);
238 PruningList.remove(N);
239
240 auto It = WorklistMap.find(N);
241 if (It == WorklistMap.end())
242 return; // Not in the worklist.
243
244 // Null out the entry rather than erasing it to avoid a linear operation.
245 Worklist[It->second] = nullptr;
246 WorklistMap.erase(It);
247 }
248
249 void deleteAndRecombine(SDNode *N);
250 bool recursivelyDeleteUnusedNodes(SDNode *N);
251
252 /// Replaces all uses of the results of one DAG node with new values.
253 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
254 bool AddTo = true);
255
256 /// Replaces all uses of the results of one DAG node with new values.
257 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
258 return CombineTo(N, &Res, 1, AddTo);
259 }
260
261 /// Replaces all uses of the results of one DAG node with new values.
262 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
263 bool AddTo = true) {
264 SDValue To[] = { Res0, Res1 };
265 return CombineTo(N, To, 2, AddTo);
266 }
267
268 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
269
270 private:
271 unsigned MaximumLegalStoreInBits;
272
273 /// Check the specified integer node value to see if it can be simplified or
274 /// if things it uses can be simplified by bit propagation.
275 /// If so, return true.
276 bool SimplifyDemandedBits(SDValue Op) {
277 unsigned BitWidth = Op.getScalarValueSizeInBits();
279 return SimplifyDemandedBits(Op, DemandedBits);
280 }
281
282 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
283 EVT VT = Op.getValueType();
284 unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
285 APInt DemandedElts = APInt::getAllOnesValue(NumElts);
286 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
287 }
288
289 /// Check the specified vector node value to see if it can be simplified or
290 /// if things it uses can be simplified as it only uses some of the
291 /// elements. If so, return true.
292 bool SimplifyDemandedVectorElts(SDValue Op) {
293 unsigned NumElts = Op.getValueType().getVectorNumElements();
294 APInt DemandedElts = APInt::getAllOnesValue(NumElts);
295 return SimplifyDemandedVectorElts(Op, DemandedElts);
296 }
297
298 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
299 const APInt &DemandedElts);
300 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
301 bool AssumeSingleUse = false);
302
303 bool CombineToPreIndexedLoadStore(SDNode *N);
304 bool CombineToPostIndexedLoadStore(SDNode *N);
305 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
306 bool SliceUpLoad(SDNode *N);
307
308 // Scalars have size 0 to distinguish from singleton vectors.
309 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
310 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
311 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
312
313 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
314 /// load.
315 ///
316 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
317 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
318 /// \param EltNo index of the vector element to load.
319 /// \param OriginalLoad load that EVE came from to be replaced.
320 /// \returns EVE on success SDValue() on failure.
321 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
322 SDValue EltNo,
323 LoadSDNode *OriginalLoad);
324 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
325 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
326 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
327 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
328 SDValue PromoteIntBinOp(SDValue Op);
329 SDValue PromoteIntShiftOp(SDValue Op);
330 SDValue PromoteExtend(SDValue Op);
331 bool PromoteLoad(SDValue Op);
332
333 /// Call the node-specific routine that knows how to fold each
334 /// particular type of node. If that doesn't do anything, try the
335 /// target-specific DAG combines.
336 SDValue combine(SDNode *N);
337
338 // Visitation implementation - Implement dag node combining for different
339 // node types. The semantics are as follows:
340 // Return Value:
341 // SDValue.getNode() == 0 - No change was made
342 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
343 // otherwise - N should be replaced by the returned Operand.
344 //
345 SDValue visitTokenFactor(SDNode *N);
346 SDValue visitMERGE_VALUES(SDNode *N);
347 SDValue visitADD(SDNode *N);
348 SDValue visitADDLike(SDNode *N);
349 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
350 SDValue visitSUB(SDNode *N);
351 SDValue visitADDSAT(SDNode *N);
352 SDValue visitSUBSAT(SDNode *N);
353 SDValue visitADDC(SDNode *N);
354 SDValue visitADDO(SDNode *N);
355 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
356 SDValue visitSUBC(SDNode *N);
357 SDValue visitSUBO(SDNode *N);
358 SDValue visitADDE(SDNode *N);
359 SDValue visitADDCARRY(SDNode *N);
360 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
361 SDValue visitSUBE(SDNode *N);
362 SDValue visitSUBCARRY(SDNode *N);
363 SDValue visitMUL(SDNode *N);
364 SDValue useDivRem(SDNode *N);
365 SDValue visitSDIV(SDNode *N);
366 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
367 SDValue visitUDIV(SDNode *N);
368 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
369 SDValue visitREM(SDNode *N);
370 SDValue visitMULHU(SDNode *N);
371 SDValue visitMULHS(SDNode *N);
372 SDValue visitSMUL_LOHI(SDNode *N);
373 SDValue visitUMUL_LOHI(SDNode *N);
374 SDValue visitMULO(SDNode *N);
375 SDValue visitIMINMAX(SDNode *N);
376 SDValue visitAND(SDNode *N);
377 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
378 SDValue visitOR(SDNode *N);
379 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
380 SDValue visitXOR(SDNode *N);
381 SDValue SimplifyVBinOp(SDNode *N);
382 SDValue visitSHL(SDNode *N);
383 SDValue visitSRA(SDNode *N);
384 SDValue visitSRL(SDNode *N);
385 SDValue visitFunnelShift(SDNode *N);
386 SDValue visitRotate(SDNode *N);
387 SDValue visitABS(SDNode *N);
388 SDValue visitBSWAP(SDNode *N);
389 SDValue visitBITREVERSE(SDNode *N);
390 SDValue visitCTLZ(SDNode *N);
391 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
392 SDValue visitCTTZ(SDNode *N);
393 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
394 SDValue visitCTPOP(SDNode *N);
395 SDValue visitSELECT(SDNode *N);
396 SDValue visitVSELECT(SDNode *N);
397 SDValue visitSELECT_CC(SDNode *N);
398 SDValue visitSETCC(SDNode *N);
399 SDValue visitSETCCCARRY(SDNode *N);
400 SDValue visitSIGN_EXTEND(SDNode *N);
401 SDValue visitZERO_EXTEND(SDNode *N);
402 SDValue visitANY_EXTEND(SDNode *N);
403 SDValue visitAssertExt(SDNode *N);
404 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
405 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
406 SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
407 SDValue visitTRUNCATE(SDNode *N);
408 SDValue visitBITCAST(SDNode *N);
409 SDValue visitBUILD_PAIR(SDNode *N);
410 SDValue visitFADD(SDNode *N);
411 SDValue visitFSUB(SDNode *N);
412 SDValue visitFMUL(SDNode *N);
413 SDValue visitFMA(SDNode *N);
414 SDValue visitFDIV(SDNode *N);
415 SDValue visitFREM(SDNode *N);
416 SDValue visitFSQRT(SDNode *N);
417 SDValue visitFCOPYSIGN(SDNode *N);
418 SDValue visitFPOW(SDNode *N);
419 SDValue visitSINT_TO_FP(SDNode *N);
420 SDValue visitUINT_TO_FP(SDNode *N);
421 SDValue visitFP_TO_SINT(SDNode *N);
422 SDValue visitFP_TO_UINT(SDNode *N);
423 SDValue visitFP_ROUND(SDNode *N);
424 SDValue visitFP_ROUND_INREG(SDNode *N);
425 SDValue visitFP_EXTEND(SDNode *N);
426 SDValue visitFNEG(SDNode *N);
427 SDValue visitFABS(SDNode *N);
428 SDValue visitFCEIL(SDNode *N);
429 SDValue visitFTRUNC(SDNode *N);
430 SDValue visitFFLOOR(SDNode *N);
431 SDValue visitFMINNUM(SDNode *N);
432 SDValue visitFMAXNUM(SDNode *N);
433 SDValue visitFMINIMUM(SDNode *N);
434 SDValue visitFMAXIMUM(SDNode *N);
435 SDValue visitBRCOND(SDNode *N);
436 SDValue visitBR_CC(SDNode *N);
437 SDValue visitLOAD(SDNode *N);
438
439 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
440 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
441
442 SDValue visitSTORE(SDNode *N);
443 SDValue visitLIFETIME_END(SDNode *N);
444 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
445 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
446 SDValue visitBUILD_VECTOR(SDNode *N);
447 SDValue visitCONCAT_VECTORS(SDNode *N);
448 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
449 SDValue visitVECTOR_SHUFFLE(SDNode *N);
450 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
451 SDValue visitINSERT_SUBVECTOR(SDNode *N);
452 SDValue visitMLOAD(SDNode *N);
453 SDValue visitMSTORE(SDNode *N);
454 SDValue visitMGATHER(SDNode *N);
455 SDValue visitMSCATTER(SDNode *N);
456 SDValue visitFP_TO_FP16(SDNode *N);
457 SDValue visitFP16_TO_FP(SDNode *N);
458 SDValue visitVECREDUCE(SDNode *N);
459
460 SDValue visitFADDForFMACombine(SDNode *N);
461 SDValue visitFSUBForFMACombine(SDNode *N);
462 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
463
464 SDValue XformToShuffleWithZero(SDNode *N);
465 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
466 const SDLoc &DL, SDValue N0,
467 SDValue N1);
468 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
469 SDValue N1);
470 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
471 SDValue N1, SDNodeFlags Flags);
472
473 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
474
475 SDValue foldSelectOfConstants(SDNode *N);
476 SDValue foldVSelectOfConstants(SDNode *N);
477 SDValue foldBinOpIntoSelect(SDNode *BO);
478 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
479 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
480 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
481 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
482 SDValue N2, SDValue N3, ISD::CondCode CC,
483 bool NotExtCompare = false);
484 SDValue convertSelectOfFPConstantsToLoadOffset(
485 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
486 ISD::CondCode CC);
487 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
488 SDValue N2, SDValue N3, ISD::CondCode CC);
489 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
490 const SDLoc &DL);
491 SDValue unfoldMaskedMerge(SDNode *N);
492 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
493 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
494 const SDLoc &DL, bool foldBooleans);
495 SDValue rebuildSetCC(SDValue N);
496
497 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
498 SDValue &CC) const;
499 bool isOneUseSetCC(SDValue N) const;
500
501 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
502 unsigned HiOp);
503 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
504 SDValue CombineExtLoad(SDNode *N);
505 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
506 SDValue combineRepeatedFPDivisors(SDNode *N);
507 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
508 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
509 SDValue BuildSDIV(SDNode *N);
510 SDValue BuildSDIVPow2(SDNode *N);
511 SDValue BuildUDIV(SDNode *N);
512 SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
513 SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
514 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
515 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
516 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
517 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
518 SDNodeFlags Flags, bool Reciprocal);
519 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
520 SDNodeFlags Flags, bool Reciprocal);
521 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
522 bool DemandHighBits = true);
523 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
524 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
525 SDValue InnerPos, SDValue InnerNeg,
526 unsigned PosOpcode, unsigned NegOpcode,
527 const SDLoc &DL);
528 SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
529 SDValue MatchLoadCombine(SDNode *N);
530 SDValue MatchStoreCombine(StoreSDNode *N);
531 SDValue ReduceLoadWidth(SDNode *N);
532 SDValue ReduceLoadOpStoreWidth(SDNode *N);
534 SDValue TransformFPLoadStorePair(SDNode *N);
535 SDValue convertBuildVecZextToZext(SDNode *N);
536 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
537 SDValue reduceBuildVecToShuffle(SDNode *N);
538 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
539 ArrayRef<int> VectorMask, SDValue VecIn1,
540 SDValue VecIn2, unsigned LeftIdx,
541 bool DidSplitVec);
542 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
543
544 /// Walk up chain skipping non-aliasing memory nodes,
545 /// looking for aliasing nodes and adding them to the Aliases vector.
546 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
547 SmallVectorImpl<SDValue> &Aliases);
548
549 /// Return true if there is any possibility that the two addresses overlap.
550 bool isAlias(SDNode *Op0, SDNode *Op1) const;
551
552 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
553 /// chain (aliasing node.)
554 SDValue FindBetterChain(SDNode *N, SDValue Chain);
555
556 /// Try to replace a store and any possibly adjacent stores on
557 /// consecutive chains with better chains. Return true only if St is
558 /// replaced.
559 ///
560 /// Notice that other chains may still be replaced even if the function
561 /// returns false.
562 bool findBetterNeighborChains(StoreSDNode *St);
563
564 // Helper for findBetterNeighborChains. Walk up store chain add additional
565 // chained stores that do not overlap and can be parallelized.
566 bool parallelizeChainedStores(StoreSDNode *St);
567
568 /// Holds a pointer to an LSBaseSDNode as well as information on where it
569 /// is located in a sequence of memory operations connected by a chain.
570 struct MemOpLink {
571 // Ptr to the mem node.
572 LSBaseSDNode *MemNode;
573
574 // Offset from the base ptr.
575 int64_t OffsetFromBase;
576
577 MemOpLink(LSBaseSDNode *N, int64_t Offset)
578 : MemNode(N), OffsetFromBase(Offset) {}
579 };
580
581 /// This is a helper function for visitMUL to check the profitability
582 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
583 /// MulNode is the original multiply, AddNode is (add x, c1),
584 /// and ConstNode is c2.
585 bool isMulAddWithConstProfitable(SDNode *MulNode,
586 SDValue &AddNode,
587 SDValue &ConstNode);
588
589 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
590 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
591 /// the type of the loaded value to be extended.
592 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
593 EVT LoadResultTy, EVT &ExtVT);
594
595 /// Helper function to calculate whether the given Load/Store can have its
596 /// width reduced to ExtVT.
597 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
598 EVT &MemVT, unsigned ShAmt = 0);
599
600 /// Used by BackwardsPropagateMask to find suitable loads.
601 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
602 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
603 ConstantSDNode *Mask, SDNode *&NodeToMask);
604 /// Attempt to propagate a given AND node back to load leaves so that they
605 /// can be combined into narrow loads.
606 bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
607
608 /// Helper function for MergeConsecutiveStores which merges the
609 /// component store chains.
610 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
611 unsigned NumStores);
612
613 /// This is a helper function for MergeConsecutiveStores. When the
614 /// source elements of the consecutive stores are all constants or
615 /// all extracted vector elements, try to merge them into one
616 /// larger store introducing bitcasts if necessary. \return True
617 /// if a merged store was created.
618 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
619 EVT MemVT, unsigned NumStores,
620 bool IsConstantSrc, bool UseVector,
621 bool UseTrunc);
622
623 /// This is a helper function for MergeConsecutiveStores. Stores
624 /// that potentially may be merged with St are placed in
625 /// StoreNodes. RootNode is a chain predecessor to all store
626 /// candidates.
627 void getStoreMergeCandidates(StoreSDNode *St,
628 SmallVectorImpl<MemOpLink> &StoreNodes,
629 SDNode *&Root);
630
631 /// Helper function for MergeConsecutiveStores. Checks if
632 /// candidate stores have indirect dependency through their
633 /// operands. RootNode is the predecessor to all stores calculated
634 /// by getStoreMergeCandidates and is used to prune the dependency check.
635 /// \return True if safe to merge.
636 bool checkMergeStoreCandidatesForDependencies(
637 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
638 SDNode *RootNode);
639
640 /// Merge consecutive store operations into a wide store.
641 /// This optimization uses wide integers or vectors when possible.
642 /// \return number of stores that were merged into a merged store (the
643 /// affected nodes are stored as a prefix in \p StoreNodes).
644 bool MergeConsecutiveStores(StoreSDNode *St);
645
646 /// Try to transform a truncation where C is a constant:
647 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
648 ///
649 /// \p N needs to be a truncation and its first operand an AND. Other
650 /// requirements are checked by the function (e.g. that trunc is
651 /// single-use) and if missed an empty SDValue is returned.
652 SDValue distributeTruncateThroughAnd(SDNode *N);
653
654 /// Helper function to determine whether the target supports operation
655 /// given by \p Opcode for type \p VT, that is, whether the operation
656 /// is legal or custom before legalizing operations, and whether is
657 /// legal (but not custom) after legalization.
658 bool hasOperation(unsigned Opcode, EVT VT) {
659 if (LegalOperations)
660 return TLI.isOperationLegal(Opcode, VT);
661 return TLI.isOperationLegalOrCustom(Opcode, VT);
662 }
663
664 public:
665 /// Runs the dag combiner on all nodes in the work list
666 void Run(CombineLevel AtLevel);
667
668 SelectionDAG &getDAG() const { return DAG; }
669
670 /// Returns a type large enough to hold any valid shift amount - before type
671 /// legalization these can be huge.
672 EVT getShiftAmountTy(EVT LHSTy) {
673 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
674 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
675 }
676
677 /// This method returns true if we are running before type legalization or
678 /// if the specified VT is legal.
679 bool isTypeLegal(const EVT &VT) {
680 if (!LegalTypes) return true;
681 return TLI.isTypeLegal(VT);
682 }
683
684 /// Convenience wrapper around TargetLowering::getSetCCResultType
685 EVT getSetCCResultType(EVT VT) const {
686 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
687 }
688
689 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
690 SDValue OrigLoad, SDValue ExtLoad,
691 ISD::NodeType ExtType);
692 };
693
694/// This class is a DAGUpdateListener that removes any deleted
695/// nodes from the worklist.
696class WorklistRemover : public SelectionDAG::DAGUpdateListener {
697 DAGCombiner &DC;
698
699public:
700 explicit WorklistRemover(DAGCombiner &dc)
701 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
702
703 void NodeDeleted(SDNode *N, SDNode *E) override {
704 DC.removeFromWorklist(N);
705 }
706};
707
708class WorklistInserter : public SelectionDAG::DAGUpdateListener {
709 DAGCombiner &DC;
710
711public:
712 explicit WorklistInserter(DAGCombiner &dc)
713 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
714
715 // FIXME: Ideally we could add N to the worklist, but this causes exponential
716 // compile time costs in large DAGs, e.g. Halide.
717 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
718};
719
720} // end anonymous namespace
721
722//===----------------------------------------------------------------------===//
723// TargetLowering::DAGCombinerInfo implementation
724//===----------------------------------------------------------------------===//
725
727 ((DAGCombiner*)DC)->AddToWorklist(N);
728}
729
731CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
732 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
733}
734
736CombineTo(SDNode *N, SDValue Res, bool AddTo) {
737 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
738}
739
741CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
742 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
743}
744
747 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
748}
749
750//===----------------------------------------------------------------------===//
751// Helper Functions
752//===----------------------------------------------------------------------===//
753
754void DAGCombiner::deleteAndRecombine(SDNode *N) {
755 removeFromWorklist(N);
756
757 // If the operands of this node are only used by the node, they will now be
758 // dead. Make sure to re-visit them and recursively delete dead nodes.
759 for (const SDValue &Op : N->ops())
760 // For an operand generating multiple values, one of the values may
761 // become dead allowing further simplification (e.g. split index
762 // arithmetic from an indexed load).
763 if (Op->hasOneUse() || Op->getNumValues() > 1)
764 AddToWorklist(Op.getNode());
765
766 DAG.DeleteNode(N);
767}
768
769/// Return 1 if we can compute the negated form of the specified expression for
770/// the same cost as the expression itself, or 2 if we can compute the negated
771/// form more cheaply than the expression itself.
772static char isNegatibleForFree(SDValue Op, bool LegalOperations,
773 const TargetLowering &TLI,
774 const TargetOptions *Options,
775 bool ForCodeSize,
776 unsigned Depth = 0) {
777 // fneg is removable even if it has multiple uses.
778 if (Op.getOpcode() == ISD::FNEG)
779 return 2;
780
781 // Don't allow anything with multiple uses unless we know it is free.
782 EVT VT = Op.getValueType();
783 const SDNodeFlags Flags = Op->getFlags();
784 if (!Op.hasOneUse() &&
785 !(Op.getOpcode() == ISD::FP_EXTEND &&
786 TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
787 return 0;
788
789 // Don't recurse exponentially.
790 if (Depth > 6)
791 return 0;
792
793 switch (Op.getOpcode()) {
794 default: return false;
795 case ISD::ConstantFP: {
796 if (!LegalOperations)
797 return 1;
798
799 // Don't invert constant FP values after legalization unless the target says
800 // the negated constant is legal.
801 return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
802 TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
803 ForCodeSize);
804 }
805 case ISD::BUILD_VECTOR: {
806 // Only permit BUILD_VECTOR of constants.
807 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
808 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
809 }))
810 return 0;
811 if (!LegalOperations)
812 return 1;
813 if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
815 return 1;
816 return llvm::all_of(Op->op_values(), [&](SDValue N) {
817 return N.isUndef() ||
818 TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
819 ForCodeSize);
820 });
821 }
822 case ISD::FADD:
823 if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
824 return 0;
825
826 // After operation legalization, it might not be legal to create new FSUBs.
827 if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
828 return 0;
829
830 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
831 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
832 Options, ForCodeSize, Depth + 1))
833 return V;
834 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
835 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
836 ForCodeSize, Depth + 1);
837 case ISD::FSUB:
838 // We can't turn -(A-B) into B-A when we honor signed zeros.
839 if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
840 return 0;
841
842 // fold (fneg (fsub A, B)) -> (fsub B, A)
843 return 1;
844
845 case ISD::FMUL:
846 case ISD::FDIV:
847 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
848 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
849 Options, ForCodeSize, Depth + 1))
850 return V;
851
852 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
853 ForCodeSize, Depth + 1);
854
855 case ISD::FP_EXTEND:
856 case ISD::FP_ROUND:
857 case ISD::FSIN:
858 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
859 ForCodeSize, Depth + 1);
860 }
861}
862
863/// If isNegatibleForFree returns true, return the newly negated expression.
865 bool LegalOperations, bool ForCodeSize,
866 unsigned Depth = 0) {
867 // fneg is removable even if it has multiple uses.
868 if (Op.getOpcode() == ISD::FNEG)
869 return Op.getOperand(0);
870
871 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
872 const TargetOptions &Options = DAG.getTarget().Options;
873 const SDNodeFlags Flags = Op->getFlags();
874
875 switch (Op.getOpcode()) {
876 default: llvm_unreachable("Unknown code");
877 case ISD::ConstantFP: {
878 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
879 V.changeSign();
880 return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
881 }
882 case ISD::BUILD_VECTOR: {
884 for (SDValue C : Op->op_values()) {
885 if (C.isUndef()) {
886 Ops.push_back(C);
887 continue;
888 }
889 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
890 V.changeSign();
891 Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
892 }
893 return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
894 }
895 case ISD::FADD:
896 assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
897
898 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
899 if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
900 DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
901 Depth + 1))
902 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
903 GetNegatedExpression(Op.getOperand(0), DAG,
904 LegalOperations, ForCodeSize,
905 Depth + 1),
906 Op.getOperand(1), Flags);
907 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
908 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
909 GetNegatedExpression(Op.getOperand(1), DAG,
910 LegalOperations, ForCodeSize,
911 Depth + 1),
912 Op.getOperand(0), Flags);
913 case ISD::FSUB:
914 // fold (fneg (fsub 0, B)) -> B
915 if (ConstantFPSDNode *N0CFP =
916 isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
917 if (N0CFP->isZero())
918 return Op.getOperand(1);
919
920 // fold (fneg (fsub A, B)) -> (fsub B, A)
921 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
922 Op.getOperand(1), Op.getOperand(0), Flags);
923
924 case ISD::FMUL:
925 case ISD::FDIV:
926 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
927 if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
928 DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
929 Depth + 1))
930 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
931 GetNegatedExpression(Op.getOperand(0), DAG,
932 LegalOperations, ForCodeSize,
933 Depth + 1),
934 Op.getOperand(1), Flags);
935
936 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
937 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
938 Op.getOperand(0),
939 GetNegatedExpression(Op.getOperand(1), DAG,
940 LegalOperations, ForCodeSize,
941 Depth + 1), Flags);
942
943 case ISD::FP_EXTEND:
944 case ISD::FSIN:
945 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
946 GetNegatedExpression(Op.getOperand(0), DAG,
947 LegalOperations, ForCodeSize,
948 Depth + 1));
949 case ISD::FP_ROUND:
950 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
951 GetNegatedExpression(Op.getOperand(0), DAG,
952 LegalOperations, ForCodeSize,
953 Depth + 1),
954 Op.getOperand(1));
955 }
956}
957
958// APInts must be the same size for most operations, this helper
959// function zero extends the shorter of the pair so that they match.
960// We provide an Offset so that we can create bitwidths that won't overflow.
961static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
962 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
963 LHS = LHS.zextOrSelf(Bits);
964 RHS = RHS.zextOrSelf(Bits);
965}
966
967// Return true if this node is a setcc, or is a select_cc
968// that selects between the target values used for true and false, making it
969// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
970// the appropriate nodes based on the type of node we are checking. This
971// simplifies life a bit for the callers.
972bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
973 SDValue &CC) const {
974 if (N.getOpcode() == ISD::SETCC) {
975 LHS = N.getOperand(0);
976 RHS = N.getOperand(1);
977 CC = N.getOperand(2);
978 return true;
979 }
980
981 if (N.getOpcode() != ISD::SELECT_CC ||
982 !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
983 !TLI.isConstFalseVal(N.getOperand(3).getNode()))
984 return false;
985
986 if (TLI.getBooleanContents(N.getValueType()) ==
988 return false;
989
990 LHS = N.getOperand(0);
991 RHS = N.getOperand(1);
992 CC = N.getOperand(4);
993 return true;
994}
995
996/// Return true if this is a SetCC-equivalent operation with only one use.
997/// If this is true, it allows the users to invert the operation for free when
998/// it is profitable to do so.
999bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1000 SDValue N0, N1, N2;
1001 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
1002 return true;
1003 return false;
1004}
1005
1006// Returns the SDNode if it is a constant float BuildVector
1007// or constant float.
1009 if (isa<ConstantFPSDNode>(N))
1010 return N.getNode();
1012 return N.getNode();
1013 return nullptr;
1014}
1015
1016// Determines if it is a constant integer or a build vector of constant
1017// integers (and undefs).
1018// Do not permit build vector implicit truncation.
1019static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1020 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1021 return !(Const->isOpaque() && NoOpaques);
1022 if (N.getOpcode() != ISD::BUILD_VECTOR)
1023 return false;
1024 unsigned BitWidth = N.getScalarValueSizeInBits();
1025 for (const SDValue &Op : N->op_values()) {
1026 if (Op.isUndef())
1027 continue;
1028 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1029 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1030 (Const->isOpaque() && NoOpaques))
1031 return false;
1032 }
1033 return true;
1034}
1035
1036// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1037// undef's.
1038static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1039 if (V.getOpcode() != ISD::BUILD_VECTOR)
1040 return false;
1041 return isConstantOrConstantVector(V, NoOpaques) ||
1043}
1044
1045bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1046 const SDLoc &DL,
1047 SDValue N0,
1048 SDValue N1) {
1049 // Currently this only tries to ensure we don't undo the GEP splits done by
1050 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1051 // we check if the following transformation would be problematic:
1052 // (load/store (add, (add, x, offset1), offset2)) ->
1053 // (load/store (add, x, offset1+offset2)).
1054
1055 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1056 return false;
1057
1058 if (N0.hasOneUse())
1059 return false;
1060
1061 auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1062 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1063 if (!C1 || !C2)
1064 return false;
1065
1066 const APInt &C1APIntVal = C1->getAPIntValue();
1067 const APInt &C2APIntVal = C2->getAPIntValue();
1068 if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1069 return false;
1070
1071 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1072 if (CombinedValueIntVal.getBitWidth() > 64)
1073 return false;
1074 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1075
1076 for (SDNode *Node : N0->uses()) {
1077 auto LoadStore = dyn_cast<MemSDNode>(Node);
1078 if (LoadStore) {
1079 // Is x[offset2] already not a legal addressing mode? If so then
1080 // reassociating the constants breaks nothing (we test offset2 because
1081 // that's the one we hope to fold into the load or store).
1083 AM.HasBaseReg = true;
1084 AM.BaseOffs = C2APIntVal.getSExtValue();
1085 EVT VT = LoadStore->getMemoryVT();
1086 unsigned AS = LoadStore->getAddressSpace();
1087 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1088 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1089 continue;
1090
1091 // Would x[offset1+offset2] still be a legal addressing mode?
1092 AM.BaseOffs = CombinedValue;
1093 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1094 return true;
1095 }
1096 }
1097
1098 return false;
1099}
1100
1101// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1102// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1103SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1104 SDValue N0, SDValue N1) {
1105 EVT VT = N0.getValueType();
1106
1107 if (N0.getOpcode() != Opc)
1108 return SDValue();
1109
1110 // Don't reassociate reductions.
1111 if (N0->getFlags().hasVectorReduction())
1112 return SDValue();
1113
1116 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1117 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
1118 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1119 return SDValue();
1120 }
1121 if (N0.hasOneUse()) {
1122 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1123 // iff (op x, c1) has one use
1124 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1125 if (!OpNode.getNode())
1126 return SDValue();
1127 AddToWorklist(OpNode.getNode());
1128 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1129 }
1130 }
1131 return SDValue();
1132}
1133
1134// Try to reassociate commutative binops.
1135SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1136 SDValue N1, SDNodeFlags Flags) {
1137 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1138 // Don't reassociate reductions.
1139 if (Flags.hasVectorReduction())
1140 return SDValue();
1141
1142 // Floating-point reassociation is not allowed without loose FP math.
1143 if (N0.getValueType().isFloatingPoint() ||
1145 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1146 return SDValue();
1147
1148 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1149 return Combined;
1150 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1151 return Combined;
1152 return SDValue();
1153}
1154
1155SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1156 bool AddTo) {
1157 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1158 ++NodesCombined;
1159 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1160 To[0].getNode()->dump(&DAG);
1161 dbgs() << " and " << NumTo - 1 << " other values\n");
1162 for (unsigned i = 0, e = NumTo; i != e; ++i)
1163 assert((!To[i].getNode() ||
1164 N->getValueType(i) == To[i].getValueType()) &&
1165 "Cannot combine value to value of different type!");
1166
1167 WorklistRemover DeadNodes(*this);
1168 DAG.ReplaceAllUsesWith(N, To);
1169 if (AddTo) {
1170 // Push the new nodes and any users onto the worklist
1171 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1172 if (To[i].getNode()) {
1173 AddToWorklist(To[i].getNode());
1174 AddUsersToWorklist(To[i].getNode());
1175 }
1176 }
1177 }
1178
1179 // Finally, if the node is now dead, remove it from the graph. The node
1180 // may not be dead if the replacement process recursively simplified to
1181 // something else needing this node.
1182 if (N->use_empty())
1183 deleteAndRecombine(N);
1184 return SDValue(N, 0);
1185}
1186
1187void DAGCombiner::
1189 // Replace all uses. If any nodes become isomorphic to other nodes and
1190 // are deleted, make sure to remove them from our worklist.
1191 WorklistRemover DeadNodes(*this);
1192 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1193
1194 // Push the new node and any (possibly new) users onto the worklist.
1195 AddToWorklist(TLO.New.getNode());
1196 AddUsersToWorklist(TLO.New.getNode());
1197
1198 // Finally, if the node is now dead, remove it from the graph. The node
1199 // may not be dead if the replacement process recursively simplified to
1200 // something else needing this node.
1201 if (TLO.Old.getNode()->use_empty())
1202 deleteAndRecombine(TLO.Old.getNode());
1203}
1204
1205/// Check the specified integer node value to see if it can be simplified or if
1206/// things it uses can be simplified by bit propagation. If so, return true.
1208 const APInt &DemandedElts) {
1209 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1210 KnownBits Known;
1211 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1212 return false;
1213
1214 // Revisit the node.
1215 AddToWorklist(Op.getNode());
1216
1217 // Replace the old value with the new one.
1218 ++NodesCombined;
1219 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1220 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1221 dbgs() << '\n');
1222
1223 CommitTargetLoweringOpt(TLO);
1224 return true;
1225}
1226
1227/// Check the specified vector node value to see if it can be simplified or
1228/// if things it uses can be simplified as it only uses some of the elements.
1229/// If so, return true.
1231 const APInt &DemandedElts,
1232 bool AssumeSingleUse) {
1233 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1234 APInt KnownUndef, KnownZero;
1235 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1236 TLO, 0, AssumeSingleUse))
1237 return false;
1238
1239 // Revisit the node.
1240 AddToWorklist(Op.getNode());
1241
1242 // Replace the old value with the new one.
1243 ++NodesCombined;
1244 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1245 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1246 dbgs() << '\n');
1247
1248 CommitTargetLoweringOpt(TLO);
1249 return true;
1250}
1251
1252void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1253 SDLoc DL(Load);
1254 EVT VT = Load->getValueType(0);
1255 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1256
1257 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1258 Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1259 WorklistRemover DeadNodes(*this);
1260 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1261 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1262 deleteAndRecombine(Load);
1263 AddToWorklist(Trunc.getNode());
1264}
1265
1266SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1267 Replace = false;
1268 SDLoc DL(Op);
1269 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1270 LoadSDNode *LD = cast<LoadSDNode>(Op);
1271 EVT MemVT = LD->getMemoryVT();
1273 : LD->getExtensionType();
1274 Replace = true;
1275 return DAG.getExtLoad(ExtType, DL, PVT,
1276 LD->getChain(), LD->getBasePtr(),
1277 MemVT, LD->getMemOperand());
1278 }
1279
1280 unsigned Opc = Op.getOpcode();
1281 switch (Opc) {
1282 default: break;
1283 case ISD::AssertSext:
1284 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1285 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1286 break;
1287 case ISD::AssertZext:
1288 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1289 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1290 break;
1291 case ISD::Constant: {
1292 unsigned ExtOpc =
1293 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1294 return DAG.getNode(ExtOpc, DL, PVT, Op);
1295 }
1296 }
1297
1298 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1299 return SDValue();
1300 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1301}
1302
1303SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1305 return SDValue();
1306 EVT OldVT = Op.getValueType();
1307 SDLoc DL(Op);
1308 bool Replace = false;
1309 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1310 if (!NewOp.getNode())
1311 return SDValue();
1312 AddToWorklist(NewOp.getNode());
1313
1314 if (Replace)
1315 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1316 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1317 DAG.getValueType(OldVT));
1318}
1319
1320SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1321 EVT OldVT = Op.getValueType();
1322 SDLoc DL(Op);
1323 bool Replace = false;
1324 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1325 if (!NewOp.getNode())
1326 return SDValue();
1327 AddToWorklist(NewOp.getNode());
1328
1329 if (Replace)
1330 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1331 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1332}
1333
1334/// Promote the specified integer binary operation if the target indicates it is
1335/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1336/// i32 since i16 instructions are longer.
1337SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1338 if (!LegalOperations)
1339 return SDValue();
1340
1341 EVT VT = Op.getValueType();
1342 if (VT.isVector() || !VT.isInteger())
1343 return SDValue();
1344
1345 // If operation type is 'undesirable', e.g. i16 on x86, consider
1346 // promoting it.
1347 unsigned Opc = Op.getOpcode();
1348 if (TLI.isTypeDesirableForOp(Opc, VT))
1349 return SDValue();
1350
1351 EVT PVT = VT;
1352 // Consult target whether it is a good idea to promote this operation and
1353 // what's the right type to promote it to.
1354 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1355 assert(PVT != VT && "Don't know what type to promote to!");
1356
1357 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1358
1359 bool Replace0 = false;
1360 SDValue N0 = Op.getOperand(0);
1361 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1362
1363 bool Replace1 = false;
1364 SDValue N1 = Op.getOperand(1);
1365 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1366 SDLoc DL(Op);
1367
1368 SDValue RV =
1369 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1370
1371 // We are always replacing N0/N1's use in N and only need
1372 // additional replacements if there are additional uses.
1373 Replace0 &= !N0->hasOneUse();
1374 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1375
1376 // Combine Op here so it is preserved past replacements.
1377 CombineTo(Op.getNode(), RV);
1378
1379 // If operands have a use ordering, make sure we deal with
1380 // predecessor first.
1381 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1382 std::swap(N0, N1);
1383 std::swap(NN0, NN1);
1384 }
1385
1386 if (Replace0) {
1387 AddToWorklist(NN0.getNode());
1388 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1389 }
1390 if (Replace1) {
1391 AddToWorklist(NN1.getNode());
1392 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1393 }
1394 return Op;
1395 }
1396 return SDValue();
1397}
1398
1399/// Promote the specified integer shift operation if the target indicates it is
1400/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1401/// i32 since i16 instructions are longer.
1402SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1403 if (!LegalOperations)
1404 return SDValue();
1405
1406 EVT VT = Op.getValueType();
1407 if (VT.isVector() || !VT.isInteger())
1408 return SDValue();
1409
1410 // If operation type is 'undesirable', e.g. i16 on x86, consider
1411 // promoting it.
1412 unsigned Opc = Op.getOpcode();
1413 if (TLI.isTypeDesirableForOp(Opc, VT))
1414 return SDValue();
1415
1416 EVT PVT = VT;
1417 // Consult target whether it is a good idea to promote this operation and
1418 // what's the right type to promote it to.
1419 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1420 assert(PVT != VT && "Don't know what type to promote to!");
1421
1422 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1423
1424 bool Replace = false;
1425 SDValue N0 = Op.getOperand(0);
1426 SDValue N1 = Op.getOperand(1);
1427 if (Opc == ISD::SRA)
1428 N0 = SExtPromoteOperand(N0, PVT);
1429 else if (Opc == ISD::SRL)
1430 N0 = ZExtPromoteOperand(N0, PVT);
1431 else
1432 N0 = PromoteOperand(N0, PVT, Replace);
1433
1434 if (!N0.getNode())
1435 return SDValue();
1436
1437 SDLoc DL(Op);
1438 SDValue RV =
1439 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1440
1441 AddToWorklist(N0.getNode());
1442 if (Replace)
1443 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1444
1445 // Deal with Op being deleted.
1446 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1447 return RV;
1448 }
1449 return SDValue();
1450}
1451
1452SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1453 if (!LegalOperations)
1454 return SDValue();
1455
1456 EVT VT = Op.getValueType();
1457 if (VT.isVector() || !VT.isInteger())
1458 return SDValue();
1459
1460 // If operation type is 'undesirable', e.g. i16 on x86, consider
1461 // promoting it.
1462 unsigned Opc = Op.getOpcode();
1463 if (TLI.isTypeDesirableForOp(Opc, VT))
1464 return SDValue();
1465
1466 EVT PVT = VT;
1467 // Consult target whether it is a good idea to promote this operation and
1468 // what's the right type to promote it to.
1469 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1470 assert(PVT != VT && "Don't know what type to promote to!");
1471 // fold (aext (aext x)) -> (aext x)
1472 // fold (aext (zext x)) -> (zext x)
1473 // fold (aext (sext x)) -> (sext x)
1474 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1475 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1476 }
1477 return SDValue();
1478}
1479
1480bool DAGCombiner::PromoteLoad(SDValue Op) {
1481 if (!LegalOperations)
1482 return false;
1483
1484 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1485 return false;
1486
1487 EVT VT = Op.getValueType();
1488 if (VT.isVector() || !VT.isInteger())
1489 return false;
1490
1491 // If operation type is 'undesirable', e.g. i16 on x86, consider
1492 // promoting it.
1493 unsigned Opc = Op.getOpcode();
1494 if (TLI.isTypeDesirableForOp(Opc, VT))
1495 return false;
1496
1497 EVT PVT = VT;
1498 // Consult target whether it is a good idea to promote this operation and
1499 // what's the right type to promote it to.
1500 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1501 assert(PVT != VT && "Don't know what type to promote to!");
1502
1503 SDLoc DL(Op);
1504 SDNode *N = Op.getNode();
1505 LoadSDNode *LD = cast<LoadSDNode>(N);
1506 EVT MemVT = LD->getMemoryVT();
1508 : LD->getExtensionType();
1509 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1510 LD->getChain(), LD->getBasePtr(),
1511 MemVT, LD->getMemOperand());
1512 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1513
1514 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1515 Result.getNode()->dump(&DAG); dbgs() << '\n');
1516 WorklistRemover DeadNodes(*this);
1517 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1518 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1519 deleteAndRecombine(N);
1520 AddToWorklist(Result.getNode());
1521 return true;
1522 }
1523 return false;
1524}
1525
1526/// Recursively delete a node which has no uses and any operands for
1527/// which it is the only use.
1528///
1529/// Note that this both deletes the nodes and removes them from the worklist.
1530/// It also adds any nodes who have had a user deleted to the worklist as they
1531/// may now have only one use and subject to other combines.
1532bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1533 if (!N->use_empty())
1534 return false;
1535
1537 Nodes.insert(N);
1538 do {
1539 N = Nodes.pop_back_val();
1540 if (!N)
1541 continue;
1542
1543 if (N->use_empty()) {
1544 for (const SDValue &ChildN : N->op_values())
1545 Nodes.insert(ChildN.getNode());
1546
1547 removeFromWorklist(N);
1548 DAG.DeleteNode(N);
1549 } else {
1550 AddToWorklist(N);
1551 }
1552 } while (!Nodes.empty());
1553 return true;
1554}
1555
1556//===----------------------------------------------------------------------===//
1557// Main DAG Combiner implementation
1558//===----------------------------------------------------------------------===//
1559
1560void DAGCombiner::Run(CombineLevel AtLevel) {
1561 // set the instance variables, so that the various visit routines may use it.
1562 Level = AtLevel;
1563 LegalOperations = Level >= AfterLegalizeVectorOps;
1564 LegalTypes = Level >= AfterLegalizeTypes;
1565
1566 WorklistInserter AddNodes(*this);
1567
1568 // Add all the dag nodes to the worklist.
1569 for (SDNode &Node : DAG.allnodes())
1570 AddToWorklist(&Node);
1571
1572 // Create a dummy node (which is not added to allnodes), that adds a reference
1573 // to the root node, preventing it from being deleted, and tracking any
1574 // changes of the root.
1575 HandleSDNode Dummy(DAG.getRoot());
1576
1577 // While we have a valid worklist entry node, try to combine it.
1578 while (SDNode *N = getNextWorklistEntry()) {
1579 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1580 // N is deleted from the DAG, since they too may now be dead or may have a
1581 // reduced number of uses, allowing other xforms.
1582 if (recursivelyDeleteUnusedNodes(N))
1583 continue;
1584
1585 WorklistRemover DeadNodes(*this);
1586
1587 // If this combine is running after legalizing the DAG, re-legalize any
1588 // nodes pulled off the worklist.
1589 if (Level == AfterLegalizeDAG) {
1590 SmallSetVector<SDNode *, 16> UpdatedNodes;
1591 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1592
1593 for (SDNode *LN : UpdatedNodes) {
1594 AddToWorklist(LN);
1595 AddUsersToWorklist(LN);
1596 }
1597 if (!NIsValid)
1598 continue;
1599 }
1600
1601 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1602
1603 // Add any operands of the new node which have not yet been combined to the
1604 // worklist as well. Because the worklist uniques things already, this
1605 // won't repeatedly process the same operand.
1606 CombinedNodes.insert(N);
1607 for (const SDValue &ChildN : N->op_values())
1608 if (!CombinedNodes.count(ChildN.getNode()))
1609 AddToWorklist(ChildN.getNode());
1610
1611 SDValue RV = combine(N);
1612
1613 if (!RV.getNode())
1614 continue;
1615
1616 ++NodesCombined;
1617
1618 // If we get back the same node we passed in, rather than a new node or
1619 // zero, we know that the node must have defined multiple values and
1620 // CombineTo was used. Since CombineTo takes care of the worklist
1621 // mechanics for us, we have no work to do in this case.
1622 if (RV.getNode() == N)
1623 continue;
1624
1625 assert(N->getOpcode() != ISD::DELETED_NODE &&
1626 RV.getOpcode() != ISD::DELETED_NODE &&
1627 "Node was deleted but visit returned new node!");
1628
1629 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1630
1631 if (N->getNumValues() == RV.getNode()->getNumValues())
1632 DAG.ReplaceAllUsesWith(N, RV.getNode());
1633 else {
1634 assert(N->getValueType(0) == RV.getValueType() &&
1635 N->getNumValues() == 1 && "Type mismatch");
1636 DAG.ReplaceAllUsesWith(N, &RV);
1637 }
1638
1639 // Push the new node and any users onto the worklist
1640 AddToWorklist(RV.getNode());
1641 AddUsersToWorklist(RV.getNode());
1642
1643 // Finally, if the node is now dead, remove it from the graph. The node
1644 // may not be dead if the replacement process recursively simplified to
1645 // something else needing this node. This will also take care of adding any
1646 // operands which have lost a user to the worklist.
1647 recursivelyDeleteUnusedNodes(N);
1648 }
1649
1650 // If the root changed (e.g. it was a dead load, update the root).
1651 DAG.setRoot(Dummy.getValue());
1652 DAG.RemoveDeadNodes();
1653}
1654
1656 switch (N->getOpcode()) {
1657 default: break;
1658 case ISD::TokenFactor: return visitTokenFactor(N);
1659 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1660 case ISD::ADD: return visitADD(N);
1661 case ISD::SUB: return visitSUB(N);
1662 case ISD::SADDSAT:
1663 case ISD::UADDSAT: return visitADDSAT(N);
1664 case ISD::SSUBSAT:
1665 case ISD::USUBSAT: return visitSUBSAT(N);
1666 case ISD::ADDC: return visitADDC(N);
1667 case ISD::SADDO:
1668 case ISD::UADDO: return visitADDO(N);
1669 case ISD::SUBC: return visitSUBC(N);
1670 case ISD::SSUBO:
1671 case ISD::USUBO: return visitSUBO(N);
1672 case ISD::ADDE: return visitADDE(N);
1673 case ISD::ADDCARRY: return visitADDCARRY(N);
1674 case ISD::SUBE: return visitSUBE(N);
1675 case ISD::SUBCARRY: return visitSUBCARRY(N);
1676 case ISD::MUL: return visitMUL(N);
1677 case ISD::SDIV: return visitSDIV(N);
1678 case ISD::UDIV: return visitUDIV(N);
1679 case ISD::SREM:
1680 case ISD::UREM: return visitREM(N);
1681 case ISD::MULHU: return visitMULHU(N);
1682 case ISD::MULHS: return visitMULHS(N);
1683 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1684 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1685 case ISD::SMULO:
1686 case ISD::UMULO: return visitMULO(N);
1687 case ISD::SMIN:
1688 case ISD::SMAX:
1689 case ISD::UMIN:
1690 case ISD::UMAX: return visitIMINMAX(N);
1691 case ISD::AND: return visitAND(N);
1692 case ISD::OR: return visitOR(N);
1693 case ISD::XOR: return visitXOR(N);
1694 case ISD::SHL: return visitSHL(N);
1695 case ISD::SRA: return visitSRA(N);
1696 case ISD::SRL: return visitSRL(N);
1697 case ISD::ROTR:
1698 case ISD::ROTL: return visitRotate(N);
1699 case ISD::FSHL:
1700 case ISD::FSHR: return visitFunnelShift(N);
1701 case ISD::ABS: return visitABS(N);
1702 case ISD::BSWAP: return visitBSWAP(N);
1703 case ISD::BITREVERSE: return visitBITREVERSE(N);
1704 case ISD::CTLZ: return visitCTLZ(N);
1705 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1706 case ISD::CTTZ: return visitCTTZ(N);
1707 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1708 case ISD::CTPOP: return visitCTPOP(N);
1709 case ISD::SELECT: return visitSELECT(N);
1710 case ISD::VSELECT: return visitVSELECT(N);
1711 case ISD::SELECT_CC: return visitSELECT_CC(N);
1712 case ISD::SETCC: return visitSETCC(N);
1713 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1714 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1715 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1716 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1717 case ISD::AssertSext:
1718 case ISD::AssertZext: return visitAssertExt(N);
1719 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1720 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1721 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1722 case ISD::TRUNCATE: return visitTRUNCATE(N);
1723 case ISD::BITCAST: return visitBITCAST(N);
1724 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1725 case ISD::FADD: return visitFADD(N);
1726 case ISD::FSUB: return visitFSUB(N);
1727 case ISD::FMUL: return visitFMUL(N);
1728 case ISD::FMA: return visitFMA(N);
1729 case ISD::FDIV: return visitFDIV(N);
1730 case ISD::FREM: return visitFREM(N);
1731 case ISD::FSQRT: return visitFSQRT(N);
1732 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1733 case ISD::FPOW: return visitFPOW(N);
1734 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1735 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1736 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1737 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1738 case ISD::FP_ROUND: return visitFP_ROUND(N);
1739 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1740 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1741 case ISD::FNEG: return visitFNEG(N);
1742 case ISD::FABS: return visitFABS(N);
1743 case ISD::FFLOOR: return visitFFLOOR(N);
1744 case ISD::FMINNUM: return visitFMINNUM(N);
1745 case ISD::FMAXNUM: return visitFMAXNUM(N);
1746 case ISD::FMINIMUM: return visitFMINIMUM(N);
1747 case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1748 case ISD::FCEIL: return visitFCEIL(N);
1749 case ISD::FTRUNC: return visitFTRUNC(N);
1750 case ISD::BRCOND: return visitBRCOND(N);
1751 case ISD::BR_CC: return visitBR_CC(N);
1752 case ISD::LOAD: return visitLOAD(N);
1753 case ISD::STORE: return visitSTORE(N);
1754 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1755 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1756 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1757 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1758 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1759 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1760 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1761 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1762 case ISD::MGATHER: return visitMGATHER(N);
1763 case ISD::MLOAD: return visitMLOAD(N);
1764 case ISD::MSCATTER: return visitMSCATTER(N);
1765 case ISD::MSTORE: return visitMSTORE(N);
1766 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1767 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1768 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1771 case ISD::VECREDUCE_ADD:
1772 case ISD::VECREDUCE_MUL:
1773 case ISD::VECREDUCE_AND:
1774 case ISD::VECREDUCE_OR:
1775 case ISD::VECREDUCE_XOR:
1781 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1782 }
1783 return SDValue();
1784}
1785
1787 SDValue RV = visit(N);
1788
1789 // If nothing happened, try a target-specific DAG combine.
1790 if (!RV.getNode()) {
1791 assert(N->getOpcode() != ISD::DELETED_NODE &&
1792 "Node was deleted but visit returned NULL!");
1793
1794 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1795 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1796
1797 // Expose the DAG combiner to the target combiner impls.
1799 DagCombineInfo(DAG, Level, false, this);
1800
1801 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1802 }
1803 }
1804
1805 // If nothing happened still, try promoting the operation.
1806 if (!RV.getNode()) {
1807 switch (N->getOpcode()) {
1808 default: break;
1809 case ISD::ADD:
1810 case ISD::SUB:
1811 case ISD::MUL:
1812 case ISD::AND:
1813 case ISD::OR:
1814 case ISD::XOR:
1815 RV = PromoteIntBinOp(SDValue(N, 0));
1816 break;
1817 case ISD::SHL:
1818 case ISD::SRA:
1819 case ISD::SRL:
1820 RV = PromoteIntShiftOp(SDValue(N, 0));
1821 break;
1822 case ISD::SIGN_EXTEND:
1823 case ISD::ZERO_EXTEND:
1824 case ISD::ANY_EXTEND:
1825 RV = PromoteExtend(SDValue(N, 0));
1826 break;
1827 case ISD::LOAD:
1828 if (PromoteLoad(SDValue(N, 0)))
1829 RV = SDValue(N, 0);
1830 break;
1831 }
1832 }
1833
1834 // If N is a commutative binary node, try to eliminate it if the commuted
1835 // version is already present in the DAG.
1836 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1837 N->getNumValues() == 1) {
1838 SDValue N0 = N->getOperand(0);
1839 SDValue N1 = N->getOperand(1);
1840
1841 // Constant operands are canonicalized to RHS.
1842 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1843 SDValue Ops[] = {N1, N0};
1844 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1845 N->getFlags());
1846 if (CSENode)
1847 return SDValue(CSENode, 0);
1848 }
1849 }
1850
1851 return RV;
1852}
1853
1854/// Given a node, return its input chain if it has one, otherwise return a null
1855/// sd operand.
1857 if (unsigned NumOps = N->getNumOperands()) {
1858 if (N->getOperand(0).getValueType() == MVT::Other)
1859 return N->getOperand(0);
1860 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1861 return N->getOperand(NumOps-1);
1862 for (unsigned i = 1; i < NumOps-1; ++i)
1863 if (N->getOperand(i).getValueType() == MVT::Other)
1864 return N->getOperand(i);
1865 }
1866 return SDValue();
1867}
1868
1869SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1870 // If N has two operands, where one has an input chain equal to the other,
1871 // the 'other' chain is redundant.
1872 if (N->getNumOperands() == 2) {
1873 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1874 return N->getOperand(0);
1875 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1876 return N->getOperand(1);
1877 }
1878
1879 // Don't simplify token factors if optnone.
1880 if (OptLevel == CodeGenOpt::None)
1881 return SDValue();
1882
1883 // If the sole user is a token factor, we should make sure we have a
1884 // chance to merge them together. This prevents TF chains from inhibiting
1885 // optimizations.
1886 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1887 AddToWorklist(*(N->use_begin()));
1888
1889 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1890 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1892 bool Changed = false; // If we should replace this token factor.
1893
1894 // Start out with this token factor.
1895 TFs.push_back(N);
1896
1897 // Iterate through token factors. The TFs grows when new token factors are
1898 // encountered.
1899 for (unsigned i = 0; i < TFs.size(); ++i) {
1900 // Limit number of nodes to inline, to avoid quadratic compile times.
1901 // We have to add the outstanding Token Factors to Ops, otherwise we might
1902 // drop Ops from the resulting Token Factors.
1903 if (Ops.size() > TokenFactorInlineLimit) {
1904 for (unsigned j = i; j < TFs.size(); j++)
1905 Ops.emplace_back(TFs[j], 0);
1906 // Drop unprocessed Token Factors from TFs, so we do not add them to the
1907 // combiner worklist later.
1908 TFs.resize(i);
1909 break;
1910 }
1911
1912 SDNode *TF = TFs[i];
1913 // Check each of the operands.
1914 for (const SDValue &Op : TF->op_values()) {
1915 switch (Op.getOpcode()) {
1916 case ISD::EntryToken:
1917 // Entry tokens don't need to be added to the list. They are
1918 // redundant.
1919 Changed = true;
1920 break;
1921
1922 case ISD::TokenFactor:
1923 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1924 // Queue up for processing.
1925 TFs.push_back(Op.getNode());
1926 Changed = true;
1927 break;
1928 }
1930
1931 default:
1932 // Only add if it isn't already in the list.
1933 if (SeenOps.insert(Op.getNode()).second)
1934 Ops.push_back(Op);
1935 else
1936 Changed = true;
1937 break;
1938 }
1939 }
1940 }
1941
1942 // Re-visit inlined Token Factors, to clean them up in case they have been
1943 // removed. Skip the first Token Factor, as this is the current node.
1944 for (unsigned i = 1, e = TFs.size(); i < e; i++)
1945 AddToWorklist(TFs[i]);
1946
1947 // Remove Nodes that are chained to another node in the list. Do so
1948 // by walking up chains breath-first stopping when we've seen
1949 // another operand. In general we must climb to the EntryNode, but we can exit
1950 // early if we find all remaining work is associated with just one operand as
1951 // no further pruning is possible.
1952
1953 // List of nodes to search through and original Ops from which they originate.
1955 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1956 SmallPtrSet<SDNode *, 16> SeenChains;
1957 bool DidPruneOps = false;
1958
1959 unsigned NumLeftToConsider = 0;
1960 for (const SDValue &Op : Ops) {
1961 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1962 OpWorkCount.push_back(1);
1963 }
1964
1965 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1966 // If this is an Op, we can remove the op from the list. Remark any
1967 // search associated with it as from the current OpNumber.
1968 if (SeenOps.count(Op) != 0) {
1969 Changed = true;
1970 DidPruneOps = true;
1971 unsigned OrigOpNumber = 0;
1972 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1973 OrigOpNumber++;
1974 assert((OrigOpNumber != Ops.size()) &&
1975 "expected to find TokenFactor Operand");
1976 // Re-mark worklist from OrigOpNumber to OpNumber
1977 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1978 if (Worklist[i].second == OrigOpNumber) {
1979 Worklist[i].second = OpNumber;
1980 }
1981 }
1982 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1983 OpWorkCount[OrigOpNumber] = 0;
1984 NumLeftToConsider--;
1985 }
1986 // Add if it's a new chain
1987 if (SeenChains.insert(Op).second) {
1988 OpWorkCount[OpNumber]++;
1989 Worklist.push_back(std::make_pair(Op, OpNumber));
1990 }
1991 };
1992
1993 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1994 // We need at least be consider at least 2 Ops to prune.
1995 if (NumLeftToConsider <= 1)
1996 break;
1997 auto CurNode = Worklist[i].first;
1998 auto CurOpNumber = Worklist[i].second;
1999 assert((OpWorkCount[CurOpNumber] > 0) &&
2000 "Node should not appear in worklist");
2001 switch (CurNode->getOpcode()) {
2002 case ISD::EntryToken:
2003 // Hitting EntryToken is the only way for the search to terminate without
2004 // hitting
2005 // another operand's search. Prevent us from marking this operand
2006 // considered.
2007 NumLeftToConsider++;
2008 break;
2009 case ISD::TokenFactor:
2010 for (const SDValue &Op : CurNode->op_values())
2011 AddToWorklist(i, Op.getNode(), CurOpNumber);
2012 break;
2014 case ISD::LIFETIME_END:
2015 case ISD::CopyFromReg:
2016 case ISD::CopyToReg:
2017 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2018 break;
2019 default:
2020 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2021 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2022 break;
2023 }
2024 OpWorkCount[CurOpNumber]--;
2025 if (OpWorkCount[CurOpNumber] == 0)
2026 NumLeftToConsider--;
2027 }
2028
2029 // If we've changed things around then replace token factor.
2030 if (Changed) {
2032 if (Ops.empty()) {
2033 // The entry token is the only possible outcome.
2034 Result = DAG.getEntryNode();
2035 } else {
2036 if (DidPruneOps) {
2037 SmallVector<SDValue, 8> PrunedOps;
2038 //
2039 for (const SDValue &Op : Ops) {
2040 if (SeenChains.count(Op.getNode()) == 0)
2041 PrunedOps.push_back(Op);
2042 }
2043 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2044 } else {
2045 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2046 }
2047 }
2048 return Result;
2049 }
2050 return SDValue();
2051}
2052
2053/// MERGE_VALUES can always be eliminated.
2054SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2055 WorklistRemover DeadNodes(*this);
2056 // Replacing results may cause a different MERGE_VALUES to suddenly
2057 // be CSE'd with N, and carry its uses with it. Iterate until no
2058 // uses remain, to ensure that the node can be safely deleted.
2059 // First add the users of this node to the work list so that they
2060 // can be tried again once they have new operands.
2061 AddUsersToWorklist(N);
2062 do {
2063 // Do as a single replacement to avoid rewalking use lists.
2065 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2066 Ops.push_back(N->getOperand(i));
2067 DAG.ReplaceAllUsesWith(N, Ops.data());
2068 } while (!N->use_empty());
2069 deleteAndRecombine(N);
2070 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2071}
2072
2073/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2074/// ConstantSDNode pointer else nullptr.
2076 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2077 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2078}
2079
2080SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2081 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2082 "Unexpected binary operator");
2083
2084 // Don't do this unless the old select is going away. We want to eliminate the
2085 // binary operator, not replace a binop with a select.
2086 // TODO: Handle ISD::SELECT_CC.
2087 unsigned SelOpNo = 0;
2088 SDValue Sel = BO->getOperand(0);
2089 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2090 SelOpNo = 1;
2091 Sel = BO->getOperand(1);
2092 }
2093
2094 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2095 return SDValue();
2096
2097 SDValue CT = Sel.getOperand(1);
2098 if (!isConstantOrConstantVector(CT, true) &&
2100 return SDValue();
2101
2102 SDValue CF = Sel.getOperand(2);
2103 if (!isConstantOrConstantVector(CF, true) &&
2105 return SDValue();
2106
2107 // Bail out if any constants are opaque because we can't constant fold those.
2108 // The exception is "and" and "or" with either 0 or -1 in which case we can
2109 // propagate non constant operands into select. I.e.:
2110 // and (select Cond, 0, -1), X --> select Cond, 0, X
2111 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2112 auto BinOpcode = BO->getOpcode();
2113 bool CanFoldNonConst =
2114 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2117
2118 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2119 if (!CanFoldNonConst &&
2120 !isConstantOrConstantVector(CBO, true) &&
2122 return SDValue();
2123
2124 EVT VT = Sel.getValueType();
2125
2126 // In case of shift value and shift amount may have different VT. For instance
2127 // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2128 // swapped operands and value types do not match. NB: x86 is fine if operands
2129 // are not swapped with shift amount VT being not bigger than shifted value.
2130 // TODO: that is possible to check for a shift operation, correct VTs and
2131 // still perform optimization on x86 if needed.
2132 if (SelOpNo && VT != CBO.getValueType())
2133 return SDValue();
2134
2135 // We have a select-of-constants followed by a binary operator with a
2136 // constant. Eliminate the binop by pulling the constant math into the select.
2137 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2138 SDLoc DL(Sel);
2139 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2140 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2141 if (!CanFoldNonConst && !NewCT.isUndef() &&
2142 !isConstantOrConstantVector(NewCT, true) &&
2144 return SDValue();
2145
2146 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2147 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2148 if (!CanFoldNonConst && !NewCF.isUndef() &&
2149 !isConstantOrConstantVector(NewCF, true) &&
2151 return SDValue();
2152
2153 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2154 SelectOp->setFlags(BO->getFlags());
2155 return SelectOp;
2156}
2157
2159 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2160 "Expecting add or sub");
2161
2162 // Match a constant operand and a zext operand for the math instruction:
2163 // add Z, C
2164 // sub C, Z
2165 bool IsAdd = N->getOpcode() == ISD::ADD;
2166 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2167 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2168 auto *CN = dyn_cast<ConstantSDNode>(C);
2169 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2170 return SDValue();
2171
2172 // Match the zext operand as a setcc of a boolean.
2173 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2174 Z.getOperand(0).getValueType() != MVT::i1)
2175 return SDValue();
2176
2177 // Match the compare as: setcc (X & 1), 0, eq.
2178 SDValue SetCC = Z.getOperand(0);
2179 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2180 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2181 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2182 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2183 return SDValue();
2184
2185 // We are adding/subtracting a constant and an inverted low bit. Turn that
2186 // into a subtract/add of the low bit with incremented/decremented constant:
2187 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2188 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2189 EVT VT = C.getValueType();
2190 SDLoc DL(N);
2191 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2192 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2193 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2194 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2195}
2196
2197/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2198/// a shift and add with a different constant.
2200 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2201 "Expecting add or sub");
2202
2203 // We need a constant operand for the add/sub, and the other operand is a
2204 // logical shift right: add (srl), C or sub C, (srl).
2205 // TODO - support non-uniform vector amounts.
2206 bool IsAdd = N->getOpcode() == ISD::ADD;
2207 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2208 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2209 ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2210 if (!C || ShiftOp.getOpcode() != ISD::SRL)
2211 return SDValue();
2212
2213 // The shift must be of a 'not' value.
2214 SDValue Not = ShiftOp.getOperand(0);
2215 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2216 return SDValue();
2217
2218 // The shift must be moving the sign bit to the least-significant-bit.
2219 EVT VT = ShiftOp.getValueType();
2220 SDValue ShAmt = ShiftOp.getOperand(1);
2221 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2222 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2223 return SDValue();
2224
2225 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2226 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2227 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2228 SDLoc DL(N);
2229 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2230 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2231 APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2232 return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2233}
2234
2235/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2236/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2237/// are no common bits set in the operands).
2238SDValue DAGCombiner::visitADDLike(SDNode *N) {
2239 SDValue N0 = N->getOperand(0);
2240 SDValue N1 = N->getOperand(1);
2241 EVT VT = N0.getValueType();
2242 SDLoc DL(N);
2243
2244 // fold vector ops
2245 if (VT.isVector()) {
2246 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2247 return FoldedVOp;
2248
2249 // fold (add x, 0) -> x, vector edition
2251 return N0;
2253 return N1;
2254 }
2255
2256 // fold (add x, undef) -> undef
2257 if (N0.isUndef())
2258 return N0;
2259
2260 if (N1.isUndef())
2261 return N1;
2262
2264 // canonicalize constant to RHS
2266 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2267 // fold (add c1, c2) -> c1+c2
2268 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2269 N1.getNode());
2270 }
2271
2272 // fold (add x, 0) -> x
2273 if (isNullConstant(N1))
2274 return N0;
2275
2276 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2277 // fold ((A-c1)+c2) -> (A+(c2-c1))
2278 if (N0.getOpcode() == ISD::SUB &&
2279 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2280 SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
2281 N0.getOperand(1).getNode());
2282 assert(Sub && "Constant folding failed");
2283 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2284 }
2285
2286 // fold ((c1-A)+c2) -> (c1+c2)-A
2287 if (N0.getOpcode() == ISD::SUB &&
2288 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2290 N0.getOperand(0).getNode());
2291 assert(Add && "Constant folding failed");
2292 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2293 }
2294
2295 // add (sext i1 X), 1 -> zext (not i1 X)
2296 // We don't transform this pattern:
2297 // add (zext i1 X), -1 -> sext (not i1 X)
2298 // because most (?) targets generate better code for the zext form.
2299 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2300 isOneOrOneSplat(N1)) {
2301 SDValue X = N0.getOperand(0);
2302 if ((!LegalOperations ||
2303 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2305 X.getScalarValueSizeInBits() == 1) {
2306 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2307 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2308 }
2309 }
2310
2311 // Undo the add -> or combine to merge constant offsets from a frame index.
2312 if (N0.getOpcode() == ISD::OR &&
2313 isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2314 isa<ConstantSDNode>(N0.getOperand(1)) &&
2315 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2316 SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2317 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2318 }
2319 }
2320
2321 if (SDValue NewSel = foldBinOpIntoSelect(N))
2322 return NewSel;
2323
2324 // reassociate add
2325 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2326 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2327 return RADD;
2328 }
2329 // fold ((0-A) + B) -> B-A
2330 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2331 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2332
2333 // fold (A + (0-B)) -> A-B
2334 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2335 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2336
2337 // fold (A+(B-A)) -> B
2338 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2339 return N1.getOperand(0);
2340
2341 // fold ((B-A)+A) -> B
2342 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2343 return N0.getOperand(0);
2344
2345 // fold ((A-B)+(C-A)) -> (C-B)
2346 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2347 N0.getOperand(0) == N1.getOperand(1))
2348 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2349 N0.getOperand(1));
2350
2351 // fold ((A-B)+(B-C)) -> (A-C)
2352 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2353 N0.getOperand(1) == N1.getOperand(0))
2354 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2355 N1.getOperand(1));
2356
2357 // fold (A+(B-(A+C))) to (B-C)
2358 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2359 N0 == N1.getOperand(1).getOperand(0))
2360 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2361 N1.getOperand(1).getOperand(1));
2362
2363 // fold (A+(B-(C+A))) to (B-C)
2364 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2365 N0 == N1.getOperand(1).getOperand(1))
2366 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2367 N1.getOperand(1).getOperand(0));
2368
2369 // fold (A+((B-A)+or-C)) to (B+or-C)
2370 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2371 N1.getOperand(0).getOpcode() == ISD::SUB &&
2372 N0 == N1.getOperand(0).getOperand(1))
2373 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2374 N1.getOperand(1));
2375
2376 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2377 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2378 SDValue N00 = N0.getOperand(0);
2379 SDValue N01 = N0.getOperand(1);
2380 SDValue N10 = N1.getOperand(0);
2381 SDValue N11 = N1.getOperand(1);
2382
2384 return DAG.getNode(ISD::SUB, DL, VT,
2385 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2386 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2387 }
2388
2389 // fold (add (umax X, C), -C) --> (usubsat X, C)
2390 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2391 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2392 return (!Max && !Op) ||
2393 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2394 };
2395 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2396 /*AllowUndefs*/ true))
2397 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2398 N0.getOperand(1));
2399 }
2400
2402 return SDValue(N, 0);
2403
2404 if (isOneOrOneSplat(N1)) {
2405 // fold (add (xor a, -1), 1) -> (sub 0, a)
2406 if (isBitwiseNot(N0))
2407 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2408 N0.getOperand(0));
2409
2410 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2411 if (N0.getOpcode() == ISD::ADD ||
2412 N0.getOpcode() == ISD::UADDO ||
2413 N0.getOpcode() == ISD::SADDO) {
2414 SDValue A, Xor;
2415
2416 if (isBitwiseNot(N0.getOperand(0))) {
2417 A = N0.getOperand(1);
2418 Xor = N0.getOperand(0);
2419 } else if (isBitwiseNot(N0.getOperand(1))) {
2420 A = N0.getOperand(0);
2421 Xor = N0.getOperand(1);
2422 }
2423
2424 if (Xor)
2425 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2426 }
2427
2428 // Look for:
2429 // add (add x, y), 1
2430 // And if the target does not like this form then turn into:
2431 // sub y, (xor x, -1)
2432 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2433 N0.getOpcode() == ISD::ADD) {
2434 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2435 DAG.getAllOnesConstant(DL, VT));
2436 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2437 }
2438 }
2439
2440 // (x - y) + -1 -> add (xor y, -1), x
2441 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2443 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2444 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2445 }
2446
2447 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2448 return Combined;
2449
2450 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2451 return Combined;
2452
2453 return SDValue();
2454}
2455
2456SDValue DAGCombiner::visitADD(SDNode *N) {
2457 SDValue N0 = N->getOperand(0);
2458 SDValue N1 = N->getOperand(1);
2459 EVT VT = N0.getValueType();
2460 SDLoc DL(N);
2461
2462 if (SDValue Combined = visitADDLike(N))
2463 return Combined;
2464
2465 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2466 return V;
2467
2468 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2469 return V;
2470
2471 // fold (a+b) -> (a|b) iff a and b share no bits.
2472 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2473 DAG.haveNoCommonBitsSet(N0, N1))
2474 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2475
2476 return SDValue();
2477}
2478
2479SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2480 unsigned Opcode = N->getOpcode();
2481 SDValue N0 = N->getOperand(0);
2482 SDValue N1 = N->getOperand(1);
2483 EVT VT = N0.getValueType();
2484 SDLoc DL(N);
2485
2486 // fold vector ops
2487 if (VT.isVector()) {
2488 // TODO SimplifyVBinOp
2489
2490 // fold (add_sat x, 0) -> x, vector edition
2492 return N0;
2494 return N1;
2495 }
2496
2497 // fold (add_sat x, undef) -> -1
2498 if (N0.isUndef() || N1.isUndef())
2499 return DAG.getAllOnesConstant(DL, VT);
2500
2502 // canonicalize constant to RHS
2504 return DAG.getNode(Opcode, DL, VT, N1, N0);
2505 // fold (add_sat c1, c2) -> c3
2506 return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2507 N1.getNode());
2508 }
2509
2510 // fold (add_sat x, 0) -> x
2511 if (isNullConstant(N1))
2512 return N0;
2513
2514 // If it cannot overflow, transform into an add.
2515 if (Opcode == ISD::UADDSAT)
2517 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2518
2519 return SDValue();
2520}
2521
2523 bool Masked = false;
2524
2525 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2526 while (true) {
2527 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2528 V = V.getOperand(0);
2529 continue;
2530 }
2531
2532 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2533 Masked = true;
2534 V = V.getOperand(0);
2535 continue;
2536 }
2537
2538 break;
2539 }
2540
2541 // If this is not a carry, return.
2542 if (V.getResNo() != 1)
2543 return SDValue();
2544
2545 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2546 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2547 return SDValue();
2548
2549 EVT VT = V.getNode()->getValueType(0);
2550 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2551 return SDValue();
2552
2553 // If the result is masked, then no matter what kind of bool it is we can
2554 // return. If it isn't, then we need to make sure the bool type is either 0 or
2555 // 1 and not other values.
2556 if (Masked ||
2559 return V;
2560
2561 return SDValue();
2562}
2563
2564/// Given the operands of an add/sub operation, see if the 2nd operand is a
2565/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2566/// the opcode and bypass the mask operation.
2567static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2568 SelectionDAG &DAG, const SDLoc &DL) {
2569 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2570 return SDValue();
2571
2572 EVT VT = N0.getValueType();
2573 if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2574 return SDValue();
2575
2576 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2577 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2578 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2579}
2580
2581/// Helper for doing combines based on N0 and N1 being added to each other.
2582SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2583 SDNode *LocReference) {
2584 EVT VT = N0.getValueType();
2585 SDLoc DL(LocReference);
2586
2587 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2588 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2590 return DAG.getNode(ISD::SUB, DL, VT, N0,
2591 DAG.getNode(ISD::SHL, DL, VT,
2592 N1.getOperand(0).getOperand(1),
2593 N1.getOperand(1)));
2594
2595 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2596 return V;
2597
2598 // Look for:
2599 // add (add x, 1), y
2600 // And if the target does not like this form then turn into:
2601 // sub y, (xor x, -1)
2602 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2603 N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2604 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2605 DAG.getAllOnesConstant(DL, VT));
2606 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2607 }
2608
2609 // Hoist one-use subtraction by non-opaque constant:
2610 // (x - C) + y -> (x + y) - C
2611 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2612 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2613 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2614 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2615 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2616 }
2617 // Hoist one-use subtraction from non-opaque constant:
2618 // (C - x) + y -> (y - x) + C
2619 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2620 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2621 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2622 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2623 }
2624
2625 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2626 // rather than 'add 0/-1' (the zext should get folded).
2627 // add (sext i1 Y), X --> sub X, (zext i1 Y)
2628 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2629 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2631 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2632 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2633 }
2634
2635 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2636 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2637 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2638 if (TN->getVT() == MVT::i1) {
2639 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2640 DAG.getConstant(1, DL, VT));
2641 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2642 }
2643 }
2644
2645 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2646 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2647 N1.getResNo() == 0)
2648 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2649 N0, N1.getOperand(0), N1.getOperand(2));
2650
2651 // (add X, Carry) -> (addcarry X, 0, Carry)
2653 if (SDValue Carry = getAsCarry(TLI, N1))
2654 return DAG.getNode(ISD::ADDCARRY, DL,
2655 DAG.getVTList(VT, Carry.getValueType()), N0,
2656 DAG.getConstant(0, DL, VT), Carry);
2657
2658 return SDValue();
2659}
2660
2661SDValue DAGCombiner::visitADDC(SDNode *N) {
2662 SDValue N0 = N->getOperand(0);
2663 SDValue N1 = N->getOperand(1);
2664 EVT VT = N0.getValueType();
2665 SDLoc DL(N);
2666
2667 // If the flag result is dead, turn this into an ADD.
2668 if (!N->hasAnyUseOfValue(1))
2669 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2671
2672 // canonicalize constant to RHS.
2673 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2674 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2675 if (N0C && !N1C)
2676 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2677
2678 // fold (addc x, 0) -> x + no carry out
2679 if (isNullConstant(N1))
2680 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2681 DL, MVT::Glue));
2682
2683 // If it cannot overflow, transform into an add.
2685 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2687
2688 return SDValue();
2689}
2690
2691static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2692 SelectionDAG &DAG, const TargetLowering &TLI) {
2693 EVT VT = V.getValueType();
2694
2695 SDValue Cst;
2696 switch (TLI.getBooleanContents(VT)) {
2699 Cst = DAG.getConstant(1, DL, VT);
2700 break;
2702 Cst = DAG.getAllOnesConstant(DL, VT);
2703 break;
2704 }
2705
2706 return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2707}
2708
2709/**
2710 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2711 * then the flip also occurs if computing the inverse is the same cost.
2712 * This function returns an empty SDValue in case it cannot flip the boolean
2713 * without increasing the cost of the computation. If you want to flip a boolean
2714 * no matter what, use flipBoolean.
2715 */
2717 const TargetLowering &TLI,
2718 bool Force) {
2719 if (Force && isa<ConstantSDNode>(V))
2720 return flipBoolean(V, SDLoc(V), DAG, TLI);
2721
2722 if (V.getOpcode() != ISD::XOR)
2723 return SDValue();
2724
2725 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2726 if (!Const)
2727 return SDValue();
2728
2729 EVT VT = V.getValueType();
2730
2731 bool IsFlip = false;
2732 switch(TLI.getBooleanContents(VT)) {
2734 IsFlip = Const->isOne();
2735 break;
2737 IsFlip = Const->isAllOnesValue();
2738 break;
2740 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2741 break;
2742 }
2743
2744 if (IsFlip)
2745 return V.getOperand(0);
2746 if (Force)
2747 return flipBoolean(V, SDLoc(V), DAG, TLI);
2748 return SDValue();
2749}
2750
2751SDValue DAGCombiner::visitADDO(SDNode *N) {
2752 SDValue N0 = N->getOperand(0);
2753 SDValue N1 = N->getOperand(1);
2754 EVT VT = N0.getValueType();
2755 bool IsSigned = (ISD::SADDO == N->getOpcode());
2756
2757 EVT CarryVT = N->getValueType(1);
2758 SDLoc DL(N);
2759
2760 // If the flag result is dead, turn this into an ADD.
2761 if (!N->hasAnyUseOfValue(1))
2762 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2763 DAG.getUNDEF(CarryVT));
2764
2765 // canonicalize constant to RHS.
2768 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2769
2770 // fold (addo x, 0) -> x + no carry out
2771 if (isNullOrNullSplat(N1))
2772 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2773
2774 if (!IsSigned) {
2775 // If it cannot overflow, transform into an add.
2777 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2778 DAG.getConstant(0, DL, CarryVT));
2779
2780 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2781 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2782 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2783 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2784 return CombineTo(N, Sub,
2785 flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2786 }
2787
2788 if (SDValue Combined = visitUADDOLike(N0, N1, N))
2789 return Combined;
2790
2791 if (SDValue Combined = visitUADDOLike(N1, N0, N))
2792 return Combined;
2793 }
2794
2795 return SDValue();
2796}
2797
2798SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2799 EVT VT = N0.getValueType();
2800 if (VT.isVector())
2801 return SDValue();
2802
2803 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2804 // If Y + 1 cannot overflow.
2805 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2806 SDValue Y = N1.getOperand(0);
2807 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2809 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2810 N1.getOperand(2));
2811 }
2812
2813 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2815 if (SDValue Carry = getAsCarry(TLI, N1))
2816 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2817 DAG.getConstant(0, SDLoc(N), VT), Carry);
2818
2819 return SDValue();
2820}
2821
2822SDValue DAGCombiner::visitADDE(SDNode *N) {
2823 SDValue N0 = N->getOperand(0);
2824 SDValue N1 = N->getOperand(1);
2825 SDValue CarryIn = N->getOperand(2);
2826
2827 // canonicalize constant to RHS
2828 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2829 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2830 if (N0C && !N1C)
2831 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2832 N1, N0, CarryIn);
2833
2834 // fold (adde x, y, false) -> (addc x, y)
2835 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2836 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2837
2838 return SDValue();
2839}
2840
2841SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2842 SDValue N0 = N->getOperand(0);
2843 SDValue N1 = N->getOperand(1);
2844 SDValue CarryIn = N->getOperand(2);
2845 SDLoc DL(N);
2846
2847 // canonicalize constant to RHS
2848 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2849 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2850 if (N0C && !N1C)
2851 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2852
2853 // fold (addcarry x, y, false) -> (uaddo x, y)
2854 if (isNullConstant(CarryIn)) {
2855 if (!LegalOperations ||
2856 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2857 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2858 }
2859
2860 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2861 if (isNullConstant(N0) && isNullConstant(N1)) {
2862 EVT VT = N0.getValueType();
2863 EVT CarryVT = CarryIn.getValueType();
2864 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2865 AddToWorklist(CarryExt.getNode());
2866 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2867 DAG.getConstant(1, DL, VT)),
2868 DAG.getConstant(0, DL, CarryVT));
2869 }
2870
2871 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2872 return Combined;
2873
2874 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2875 return Combined;
2876
2877 return SDValue();
2878}
2879
2880/**
2881 * If we are facing some sort of diamond carry propapagtion pattern try to
2882 * break it up to generate something like:
2883 * (addcarry X, 0, (addcarry A, B, Z):Carry)
2884 *
2885 * The end result is usually an increase in operation required, but because the
2886 * carry is now linearized, other tranforms can kick in and optimize the DAG.
2887 *
2888 * Patterns typically look something like
2889 * (uaddo A, B)
2890 * / \
2891 * Carry Sum
2892 * | \
2893 * | (addcarry *, 0, Z)
2894 * | /
2895 * \ Carry
2896 * | /
2897 * (addcarry X, *, *)
2898 *
2899 * But numerous variation exist. Our goal is to identify A, B, X and Z and
2900 * produce a combine with a single path for carry propagation.
2901 */
2903 SDValue X, SDValue Carry0, SDValue Carry1,
2904 SDNode *N) {
2905 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2906 return SDValue();
2907 if (Carry1.getOpcode() != ISD::UADDO)
2908 return SDValue();
2909
2910 SDValue Z;
2911
2912 /**
2913 * First look for a suitable Z. It will present itself in the form of
2914 * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2915 */
2916 if (Carry0.getOpcode() == ISD::ADDCARRY &&
2917 isNullConstant(Carry0.getOperand(1))) {
2918 Z = Carry0.getOperand(2);
2919 } else if (Carry0.getOpcode() == ISD::UADDO &&
2920 isOneConstant(Carry0.getOperand(1))) {
2921 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2922 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2923 } else {
2924 // We couldn't find a suitable Z.
2925 return SDValue();
2926 }
2927
2928
2929 auto cancelDiamond = [&](SDValue A,SDValue B) {
2930 SDLoc DL(N);
2931 SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2932 Combiner.AddToWorklist(NewY.getNode());
2933 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2934 DAG.getConstant(0, DL, X.getValueType()),
2935 NewY.getValue(1));
2936 };
2937
2938 /**
2939 * (uaddo A, B)
2940 * |
2941 * Sum
2942 * |
2943 * (addcarry *, 0, Z)
2944 */
2945 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2946 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2947 }
2948
2949 /**
2950 * (addcarry A, 0, Z)
2951 * |
2952 * Sum
2953 * |
2954 * (uaddo *, B)
2955 */
2956 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2957 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2958 }
2959
2960 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2961 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2962 }
2963
2964 return SDValue();
2965}
2966
2967SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2968 SDNode *N) {
2969 // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
2970 if (isBitwiseNot(N0))
2971 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
2972 SDLoc DL(N);
2973 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
2974 N0.getOperand(0), NotC);
2975 return CombineTo(N, Sub,
2976 flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2977 }
2978
2979 // Iff the flag result is dead:
2980 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2981 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
2982 // or the dependency between the instructions.
2983 if ((N0.getOpcode() == ISD::ADD ||
2984 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
2985 N0.getValue(1) != CarryIn)) &&
2986 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2987 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2988 N0.getOperand(0), N0.getOperand(1), CarryIn);
2989
2990 /**
2991 * When one of the addcarry argument is itself a carry, we may be facing
2992 * a diamond carry propagation. In which case we try to transform the DAG
2993 * to ensure linear carry propagation if that is possible.
2994 */
2995 if (auto Y = getAsCarry(TLI, N1)) {
2996 // Because both are carries, Y and Z can be swapped.
2997 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
2998 return R;
2999 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3000 return R;
3001 }
3002
3003 return SDValue();
3004}
3005
3006// Since it may not be valid to emit a fold to zero for vector initializers
3007// check if we can before folding.
3008static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3009 SelectionDAG &DAG, bool LegalOperations) {
3010 if (!VT.isVector())
3011 return DAG.getConstant(0, DL, VT);
3012 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3013 return DAG.getConstant(0, DL, VT);
3014 return SDValue();
3015}
3016
3017SDValue DAGCombiner::visitSUB(SDNode *N) {
3018 SDValue N0 = N->getOperand(0);
3019 SDValue N1 = N->getOperand(1);
3020 EVT VT = N0.getValueType();
3021 SDLoc DL(N);
3022
3023 // fold vector ops
3024 if (VT.isVector()) {
3025 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3026 return FoldedVOp;
3027
3028 // fold (sub x, 0) -> x, vector edition
3030 return N0;
3031 }
3032
3033 // fold (sub x, x) -> 0
3034 // FIXME: Refactor this and xor and other similar operations together.
3035 if (N0 == N1)
3036 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3039 // fold (sub c1, c2) -> c1-c2
3040 return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
3041 N1.getNode());
3042 }
3043
3044 if (SDValue NewSel = foldBinOpIntoSelect(N))
3045 return NewSel;
3046
3048
3049 // fold (sub x, c) -> (add x, -c)
3050 if (N1C) {
3051 return DAG.getNode(ISD::ADD, DL, VT, N0,
3052 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3053 }
3054
3055 if (isNullOrNullSplat(N0)) {
3056 unsigned BitWidth = VT.getScalarSizeInBits();
3057 // Right-shifting everything out but the sign bit followed by negation is
3058 // the same as flipping arithmetic/logical shift type without the negation:
3059 // -(X >>u 31) -> (X >>s 31)
3060 // -(X >>s 31) -> (X >>u 31)
3061 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3063 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3064 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3065 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3066 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3067 }
3068 }
3069
3070 // 0 - X --> 0 if the sub is NUW.
3071 if (N->getFlags().hasNoUnsignedWrap())
3072 return N0;
3073
3074 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3075 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3076 // N1 must be 0 because negating the minimum signed value is undefined.
3077 if (N->getFlags().hasNoSignedWrap())
3078 return N0;
3079
3080 // 0 - X --> X if X is 0 or the minimum signed value.
3081 return N1;
3082 }
3083 }
3084
3085 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3087 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3088
3089 // fold (A - (0-B)) -> A+B
3090 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3091 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3092
3093 // fold A-(A-B) -> B
3094 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3095 return N1.getOperand(1);
3096
3097 // fold (A+B)-A -> B
3098 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3099 return N0.getOperand(1);
3100
3101 // fold (A+B)-B -> A
3102 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3103 return N0.getOperand(0);
3104
3105 // fold (A+C1)-C2 -> A+(C1-C2)
3106 if (N0.getOpcode() == ISD::ADD &&
3107 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3108 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3110 ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
3111 assert(NewC && "Constant folding failed");
3112 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3113 }
3114
3115 // fold C2-(A+C1) -> (C2-C1)-A
3116 if (N1.getOpcode() == ISD::ADD) {
3117 SDValue N11 = N1.getOperand(1);
3118 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3119 isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3120 SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
3121 N11.getNode());
3122 assert(NewC && "Constant folding failed");
3123 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3124 }
3125 }
3126
3127 // fold (A-C1)-C2 -> A-(C1+C2)
3128 if (N0.getOpcode() == ISD::SUB &&
3129 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3130 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3132 ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
3133 assert(NewC && "Constant folding failed");
3134 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3135 }
3136
3137 // fold (c1-A)-c2 -> (c1-c2)-A
3138 if (N0.getOpcode() == ISD::SUB &&
3139 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3140 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3142 ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
3143 assert(NewC && "Constant folding failed");
3144 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3145 }
3146
3147 // fold ((A+(B+or-C))-B) -> A+or-C
3148 if (N0.getOpcode() == ISD::ADD &&
3149 (N0.getOperand(1).getOpcode() == ISD::SUB ||
3150 N0.getOperand(1).getOpcode() == ISD::ADD) &&
3151 N0.getOperand(1).getOperand(0) == N1)
3152 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3153 N0.getOperand(1).getOperand(1));
3154
3155 // fold ((A+(C+B))-B) -> A+C
3156 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3157 N0.getOperand(1).getOperand(1) == N1)
3158 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3159 N0.getOperand(1).getOperand(0));
3160
3161 // fold ((A-(B-C))-C) -> A-B
3162 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3163 N0.getOperand(1).getOperand(1) == N1)
3164 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3165 N0.getOperand(1).getOperand(0));
3166
3167 // fold (A-(B-C)) -> A+(C-B)
3168 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3169 return DAG.getNode(ISD::ADD, DL, VT, N0,
3170 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3171 N1.getOperand(0)));
3172
3173 // fold (X - (-Y * Z)) -> (X + (Y * Z))
3174 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3175 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3177 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3178 N1.getOperand(0).getOperand(1),
3179 N1.getOperand(1));
3180 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3181 }
3182 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3184 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3185 N1.getOperand(0),
3186 N1.getOperand(1).getOperand(1));
3187 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3188 }
3189 }
3190
3191 // If either operand of a sub is undef, the result is undef
3192 if (N0.isUndef())
3193 return N0;
3194 if (N1.isUndef())
3195 return N1;
3196
3197 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3198 return V;
3199
3200 if (SDValue V = foldAddSubOfSignBit(N, DAG))
3201 return V;
3202
3203 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3204 return V;
3205
3206 // (x - y) - 1 -> add (xor y, -1), x
3207 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3208 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3209 DAG.getAllOnesConstant(DL, VT));
3210 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3211 }
3212
3213 // Look for:
3214 // sub y, (xor x, -1)
3215 // And if the target does not like this form then turn into:
3216 // add (add x, y), 1
3217 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3218 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3219 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3220 }
3221
3222 // Hoist one-use addition by non-opaque constant:
3223 // (x + C) - y -> (x - y) + C
3224 if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3225 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3226 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3227 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3228 }
3229 // y - (x + C) -> (y - x) - C
3230 if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3231 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3232 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3233 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3234 }
3235 // (x - C) - y -> (x - y) - C
3236 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3237 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3238 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3239 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3240 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3241 }
3242 // (C - x) - y -> C - (x + y)
3243 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3244 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3245 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3246 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3247 }
3248
3249 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3250 // rather than 'sub 0/1' (the sext should get folded).
3251 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3252 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3253 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3254 TLI.getBooleanContents(VT) ==
3256 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3257 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3258 }
3259
3260 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3261 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3262 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3263 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3264 SDValue S0 = N1.getOperand(0);
3265 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3266 unsigned OpSizeInBits = VT.getScalarSizeInBits();
3268 if (C->getAPIntValue() == (OpSizeInBits - 1))
3269 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3270 }
3271 }
3272 }
3273
3274 // If the relocation model supports it, consider symbol offsets.
3275 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3276 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3277 // fold (sub Sym, c) -> Sym-c
3278 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3279 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3280 GA->getOffset() -
3281 (uint64_t)N1C->getSExtValue());
3282 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3283 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3284 if (GA->getGlobal() == GB->getGlobal())
3285 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3286 DL, VT);
3287 }
3288
3289 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3290 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3291 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3292 if (TN->getVT() == MVT::i1) {
3293 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3294 DAG.getConstant(1, DL, VT));
3295 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3296 }
3297 }
3298
3299 // Prefer an add for more folding potential and possibly better codegen:
3300 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3301 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3302 SDValue ShAmt = N1.getOperand(1);
3303 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3304 if (ShAmtC &&
3305 ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3306 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3307 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3308 }
3309 }
3310
3311 return SDValue();
3312}
3313
3314SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3315 SDValue N0 = N->getOperand(0);
3316 SDValue N1 = N->getOperand(1);
3317 EVT VT = N0.getValueType();
3318 SDLoc DL(N);
3319
3320 // fold vector ops
3321 if (VT.isVector()) {
3322 // TODO SimplifyVBinOp
3323
3324 // fold (sub_sat x, 0) -> x, vector edition
3326 return N0;
3327 }
3328
3329 // fold (sub_sat x, undef) -> 0
3330 if (N0.isUndef() || N1.isUndef())
3331 return DAG.getConstant(0, DL, VT);
3332
3333 // fold (sub_sat x, x) -> 0
3334 if (N0 == N1)
3335 return DAG.getConstant(0, DL, VT);
3336
3339 // fold (sub_sat c1, c2) -> c3
3340 return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
3341 N1.getNode());
3342 }
3343
3344 // fold (sub_sat x, 0) -> x
3345 if (isNullConstant(N1))
3346 return N0;
3347
3348 return SDValue();
3349}
3350
3351SDValue DAGCombiner::visitSUBC(SDNode *N) {
3352 SDValue N0 = N->getOperand(0);
3353 SDValue N1 = N->getOperand(1);
3354 EVT VT = N0.getValueType();
3355 SDLoc DL(N);
3356
3357 // If the flag result is dead, turn this into an SUB.
3358 if (!N->hasAnyUseOfValue(1))
3359 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3361
3362 // fold (subc x, x) -> 0 + no borrow
3363 if (N0 == N1)
3364 return CombineTo(N, DAG.getConstant(0, DL, VT),
3366
3367 // fold (subc x, 0) -> x + no borrow
3368 if (isNullConstant(N1))
3369 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3370
3371 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3372 if (isAllOnesConstant(N0))
3373 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3375
3376 return SDValue();
3377}
3378
3379SDValue DAGCombiner::visitSUBO(SDNode *N) {
3380 SDValue N0 = N->getOperand(0);
3381 SDValue N1 = N->getOperand(1);
3382 EVT VT = N0.getValueType();
3383 bool IsSigned = (ISD::SSUBO == N->getOpcode());
3384
3385 EVT CarryVT = N->getValueType(1);
3386 SDLoc DL(N);
3387
3388 // If the flag result is dead, turn this into an SUB.
3389 if (!N->hasAnyUseOfValue(1))
3390 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3391 DAG.getUNDEF(CarryVT));
3392
3393 // fold (subo x, x) -> 0 + no borrow
3394 if (N0 == N1)
3395 return CombineTo(N, DAG.getConstant(0, DL, VT),
3396 DAG.getConstant(0, DL, CarryVT));
3397
3399
3400 // fold (subox, c) -> (addo x, -c)
3401 if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3402 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3403 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3404 }
3405
3406 // fold (subo x, 0) -> x + no borrow
3407 if (isNullOrNullSplat(N1))
3408 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3409
3410 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3411 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3412 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3413 DAG.getConstant(0, DL, CarryVT));
3414
3415 return SDValue();
3416}
3417
3418SDValue DAGCombiner::visitSUBE(SDNode *N) {
3419 SDValue N0 = N->getOperand(0);
3420 SDValue N1 = N->getOperand(1);
3421 SDValue CarryIn = N->getOperand(2);
3422
3423 // fold (sube x, y, false) -> (subc x, y)
3424 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3425 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3426
3427 return SDValue();
3428}
3429
3430SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3431 SDValue N0 = N->getOperand(0);
3432 SDValue N1 = N->getOperand(1);
3433 SDValue CarryIn = N->getOperand(2);
3434
3435 // fold (subcarry x, y, false) -> (usubo x, y)
3436 if (isNullConstant(CarryIn)) {
3437 if (!LegalOperations ||
3438 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3439 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3440 }
3441
3442 return SDValue();
3443}
3444
3445SDValue DAGCombiner::visitMUL(SDNode *N) {
3446 SDValue N0 = N->getOperand(0);
3447 SDValue N1 = N->getOperand(1);
3448 EVT VT = N0.getValueType();
3449
3450 // fold (mul x, undef) -> 0
3451 if (N0.isUndef() || N1.isUndef())
3452 return DAG.getConstant(0, SDLoc(N), VT);
3453
3454 bool N0IsConst = false;
3455 bool N1IsConst = false;
3456 bool N1IsOpaqueConst = false;
3457 bool N0IsOpaqueConst = false;
3458 APInt ConstValue0, ConstValue1;
3459 // fold vector ops
3460 if (VT.isVector()) {
3461 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3462 return FoldedVOp;
3463
3464 N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3465 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3466 assert((!N0IsConst ||
3467 ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3468 "Splat APInt should be element width");
3469 assert((!N1IsConst ||
3470 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3471 "Splat APInt should be element width");
3472 } else {
3473 N0IsConst = isa<ConstantSDNode>(N0);
3474 if (N0IsConst) {
3475 ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3476 N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3477 }
3478 N1IsConst = isa<ConstantSDNode>(N1);
3479 if (N1IsConst) {
3480 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3481 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3482 }
3483 }
3484
3485 // fold (mul c1, c2) -> c1*c2
3486 if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3487 return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3488 N0.getNode(), N1.getNode());
3489
3490 // canonicalize constant to RHS (vector doesn't have to splat)
3493 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3494 // fold (mul x, 0) -> 0
3495 if (N1IsConst && ConstValue1.isNullValue())
3496 return N1;
3497 // fold (mul x, 1) -> x
3498 if (N1IsConst && ConstValue1.isOneValue())
3499 return N0;
3500
3501 if (SDValue NewSel = foldBinOpIntoSelect(N))
3502 return NewSel;
3503
3504 // fold (mul x, -1) -> 0-x
3505 if (N1IsConst && ConstValue1.isAllOnesValue()) {
3506 SDLoc DL(N);
3507 return DAG.getNode(ISD::SUB, DL, VT,
3508 DAG.getConstant(0, DL, VT), N0);
3509 }
3510 // fold (mul x, (1 << c)) -> x << c
3511 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3512 DAG.isKnownToBeAPowerOfTwo(N1) &&
3513 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3514 SDLoc DL(N);
3515 SDValue LogBase2 = BuildLogBase2(N1, DL);
3516 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3517 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3518 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3519 }
3520 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3521 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3522 unsigned Log2Val = (-ConstValue1).logBase2();
3523 SDLoc DL(N);
3524 // FIXME: If the input is something that is easily negated (e.g. a
3525 // single-use add), we should put the negate there.
3526 return DAG.getNode(ISD::SUB, DL, VT,
3527 DAG.getConstant(0, DL, VT),
3528 DAG.getNode(ISD::SHL, DL, VT, N0,
3529 DAG.getConstant(Log2Val, DL,
3531 }
3532
3533 // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3534 // mul x, (2^N + 1) --> add (shl x, N), x
3535 // mul x, (2^N - 1) --> sub (shl x, N), x
3536 // Examples: x * 33 --> (x << 5) + x
3537 // x * 15 --> (x << 4) - x
3538 // x * -33 --> -((x << 5) + x)
3539 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3540 if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3541 // TODO: We could handle more general decomposition of any constant by
3542 // having the target set a limit on number of ops and making a
3543 // callback to determine that sequence (similar to sqrt expansion).
3544 unsigned MathOp = ISD::DELETED_NODE;
3545 APInt MulC = ConstValue1.abs();
3546 if ((MulC - 1).isPowerOf2())
3547 MathOp = ISD::ADD;
3548 else if ((MulC + 1).isPowerOf2())
3549 MathOp = ISD::SUB;
3550
3551 if (MathOp != ISD::DELETED_NODE) {
3552 unsigned ShAmt =
3553 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3554 assert(ShAmt < VT.getScalarSizeInBits() &&
3555 "multiply-by-constant generated out of bounds shift");
3556 SDLoc DL(N);
3557 SDValue Shl =
3558 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3559 SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3560 if (ConstValue1.isNegative())
3561 R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3562 return R;
3563 }
3564 }
3565
3566 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3567 if (N0.getOpcode() == ISD::SHL &&
3568 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3569 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3570 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3572 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3573 }
3574
3575 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3576 // use.
3577 {
3578 SDValue Sh(nullptr, 0), Y(nullptr, 0);
3579
3580 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3581 if (N0.getOpcode() == ISD::SHL &&
3583 N0.getNode()->hasOneUse()) {
3584 Sh = N0; Y = N1;
3585 } else if (N1.getOpcode() == ISD::SHL &&
3587 N1.getNode()->hasOneUse()) {
3588 Sh = N1; Y = N0;
3589 }
3590
3591 if (Sh.getNode()) {
3592 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3593 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3594 }
3595 }
3596
3597 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3599 N0.getOpcode() == ISD::ADD &&
3601 isMulAddWithConstProfitable(N, N0, N1))
3602 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3603 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3604 N0.getOperand(0), N1),
3605 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3606 N0.getOperand(1), N1));
3607
3608 // reassociate mul
3609 if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3610 return RMUL;
3611
3612 return SDValue();
3613}
3614
3615/// Return true if divmod libcall is available.
3616static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3617 const TargetLowering &TLI) {
3618 RTLIB::Libcall LC;
3619 EVT NodeType = Node->getValueType(0);
3620 if (!NodeType.isSimple())
3621 return false;
3622 switch (NodeType.getSimpleVT().SimpleTy) {
3623 default: return false; // No libcall for vector types.
3624 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3625 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3626 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3627 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3628 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3629 }
3630
3631 return TLI.getLibcallName(LC) != nullptr;
3632}
3633
3634/// Issue divrem if both quotient and remainder are needed.
3635SDValue DAGCombiner::useDivRem(SDNode *Node) {
3636 if (Node->use_empty())
3637 return SDValue(); // This is a dead node, leave it alone.
3638
3639 unsigned Opcode = Node->getOpcode();
3640 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3641 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3642
3643 // DivMod lib calls can still work on non-legal types if using lib-calls.
3644 EVT VT = Node->getValueType(0);
3645 if (VT.isVector() || !VT.isInteger())
3646 return SDValue();
3647
3648 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3649 return SDValue();
3650
3651 // If DIVREM is going to get expanded into a libcall,
3652 // but there is no libcall available, then don't combine.
3653 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3654 !isDivRemLibcallAvailable(Node, isSigned, TLI))
3655 return SDValue();
3656
3657 // If div is legal, it's better to do the normal expansion
3658 unsigned OtherOpcode = 0;
3659 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3660 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3661 if (TLI.isOperationLegalOrCustom(Opcode, VT))
3662 return SDValue();
3663 } else {
3664 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3665 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3666 return SDValue();
3667 }
3668
3669 SDValue Op0 = Node->getOperand(0);
3670 SDValue Op1 = Node->getOperand(1);
3671 SDValue combined;
3672 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3673 UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3674 SDNode *User = *UI;
3675 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3676 User->use_empty())
3677 continue;
3678 // Convert the other matching node(s), too;
3679 // otherwise, the DIVREM may get target-legalized into something
3680 // target-specific that we won't be able to recognize.
3681 unsigned UserOpc = User->getOpcode();
3682 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3683 User->getOperand(0) == Op0 &&
3684 User->getOperand(1) == Op1) {
3685 if (!combined) {
3686 if (UserOpc == OtherOpcode) {
3687 SDVTList VTs = DAG.getVTList(VT, VT);
3688 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3689 } else if (UserOpc == DivRemOpc) {
3690 combined = SDValue(User, 0);
3691 } else {
3692 assert(UserOpc == Opcode);
3693 continue;
3694 }
3695 }
3696 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3697 CombineTo(User, combined);
3698 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3699 CombineTo(User, combined.getValue(1));
3700 }
3701 }
3702 return combined;
3703}
3704
3706 SDValue N0 = N->getOperand(0);
3707 SDValue N1 = N->getOperand(1);
3708 EVT VT = N->getValueType(0);
3709 SDLoc DL(N);
3710
3711 unsigned Opc = N->getOpcode();
3712 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3714
3715 // X / undef -> undef
3716 // X % undef -> undef
3717 // X / 0 -> undef
3718 // X % 0 -> undef
3719 // NOTE: This includes vectors where any divisor element is zero/undef.
3720 if (DAG.isUndef(Opc, {N0, N1}))
3721 return DAG.getUNDEF(VT);
3722
3723 // undef / X -> 0
3724 // undef % X -> 0
3725 if (N0.isUndef())
3726 return DAG.getConstant(0, DL, VT);
3727
3728 // 0 / X -> 0
3729 // 0 % X -> 0
3731 if (N0C && N0C->isNullValue())
3732 return N0;
3733
3734 // X / X -> 1
3735 // X % X -> 0
3736 if (N0 == N1)
3737 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3738
3739 // X / 1 -> X
3740 // X % 1 -> 0
3741 // If this is a boolean op (single-bit element type), we can't have
3742 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3743 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3744 // it's a 1.
3745 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3746 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3747
3748 return SDValue();
3749}
3750
3751SDValue DAGCombiner::visitSDIV(SDNode *N) {
3752 SDValue N0 = N->getOperand(0);
3753 SDValue N1 = N->getOperand(1);
3754 EVT VT = N->getValueType(0);
3755 EVT CCVT = getSetCCResultType(VT);
3756
3757 // fold vector ops
3758 if (VT.isVector())
3759 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3760 return FoldedVOp;
3761
3762 SDLoc DL(N);
3763
3764 // fold (sdiv c1, c2) -> c1/c2
3767 if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3768 return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3769 // fold (sdiv X, -1) -> 0-X
3770 if (N1C && N1C->isAllOnesValue())
3771 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3772 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3773 if (N1C && N1C->getAPIntValue().isMinSignedValue())
3774 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3775 DAG.getConstant(1, DL, VT),
3776 DAG.getConstant(0, DL, VT));
3777
3778 if (SDValue V = simplifyDivRem(N, DAG))
3779 return V;
3780
3781 if (SDValue NewSel = foldBinOpIntoSelect(N))
3782 return NewSel;
3783
3784 // If we know the sign bits of both operands are zero, strength reduce to a
3785 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3786 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3787 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3788
3789 if (SDValue V = visitSDIVLike(N0, N1, N)) {
3790 // If the corresponding remainder node exists, update its users with
3791 // (Dividend - (Quotient * Divisor).
3792 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3793 { N0, N1 })) {
3794 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3795 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3796 AddToWorklist(Mul.getNode());
3797 AddToWorklist(Sub.getNode());
3798 CombineTo(RemNode, Sub);
3799 }
3800 return V;
3801 }
3802
3803 // sdiv, srem -> sdivrem
3804 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3805 // true. Otherwise, we break the simplification logic in visitREM().
3806 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3807 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3808 if (SDValue DivRem = useDivRem(N))
3809 return DivRem;
3810
3811 return SDValue();
3812}
3813
3814SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3815 SDLoc DL(N);
3816 EVT VT = N->getValueType(0);
3817 EVT CCVT = getSetCCResultType(VT);
3818 unsigned BitWidth = VT.getScalarSizeInBits();
3819
3820 // Helper for determining whether a value is a power-2 constant scalar or a
3821 // vector of such elements.
3822 auto IsPowerOfTwo = [](ConstantSDNode *C) {
3823 if (C->isNullValue() || C->isOpaque())
3824 return false;
3825 if (C->getAPIntValue().isPowerOf2())
3826 return true;
3827 if ((-C->getAPIntValue()).isPowerOf2())
3828 return true;
3829 return false;
3830 };
3831
3832 // fold (sdiv X, pow2) -> simple ops after legalize
3833 // FIXME: We check for the exact bit here because the generic lowering gives
3834 // better results in that case. The target-specific lowering should learn how
3835 // to handle exact sdivs efficiently.
3836 if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3837 // Target-specific implementation of sdiv x, pow2.
3838 if (SDValue Res = BuildSDIVPow2(N))
3839 return Res;
3840
3841 // Create constants that are functions of the shift amount value.
3842 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3843 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3844 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3845 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3846 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3847 if (!isConstantOrConstantVector(Inexact))
3848 return SDValue();
3849
3850 // Splat the sign bit into the register
3851 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3852 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3853 AddToWorklist(Sign.getNode());
3854
3855 // Add (N0 < 0) ? abs2 - 1 : 0;
3856 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3857 AddToWorklist(Srl.getNode());
3858 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3859 AddToWorklist(Add.getNode());
3860 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3861 AddToWorklist(Sra.getNode());
3862
3863 // Special case: (sdiv X, 1) -> X
3864 // Special Case: (sdiv X, -1) -> 0-X
3865 SDValue One = DAG.getConstant(1, DL, VT);
3866 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3867 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3868 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3869 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3870 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3871
3872 // If dividing by a positive value, we're done. Otherwise, the result must
3873 // be negated.
3874 SDValue Zero = DAG.getConstant(0, DL, VT);
3875 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3876
3877 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3878 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3879 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3880 return Res;
3881 }
3882
3883 // If integer divide is expensive and we satisfy the requirements, emit an
3884 // alternate sequence. Targets may check function attributes for size/speed
3885 // trade-offs.
3886 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3888 !TLI.isIntDivCheap(N->getValueType(0), Attr))
3889 if (SDValue Op = BuildSDIV(N))
3890 return Op;
3891
3892 return SDValue();
3893}
3894
3895SDValue DAGCombiner::visitUDIV(SDNode *N) {
3896 SDValue N0 = N->getOperand(0);
3897 SDValue N1 = N->getOperand(1);
3898 EVT VT = N->getValueType(0);
3899 EVT CCVT = getSetCCResultType(VT);
3900
3901 // fold vector ops
3902 if (VT.isVector())
3903 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3904 return FoldedVOp;
3905
3906 SDLoc DL(N);
3907
3908 // fold (udiv c1, c2) -> c1/c2
3911 if (N0C && N1C)
3912 if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3913 N0C, N1C))
3914 return Folded;
3915 // fold (udiv X, -1) -> select(X == -1, 1, 0)
3916 if (N1C && N1C->getAPIntValue().isAllOnesValue())
3917 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3918 DAG.getConstant(1, DL, VT),
3919 DAG.getConstant(0, DL, VT));
3920
3921 if (SDValue V = simplifyDivRem(N, DAG))
3922 return V;
3923
3924 if (SDValue NewSel = foldBinOpIntoSelect(N))
3925 return NewSel;
3926
3927 if (SDValue V = visitUDIVLike(N0, N1, N)) {
3928 // If the corresponding remainder node exists, update its users with
3929 // (Dividend - (Quotient * Divisor).
3930 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3931 { N0, N1 })) {
3932 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3933 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3934 AddToWorklist(Mul.getNode());
3935 AddToWorklist(Sub.getNode());
3936 CombineTo(RemNode, Sub);
3937 }
3938 return V;
3939 }
3940
3941 // sdiv, srem -> sdivrem
3942 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3943 // true. Otherwise, we break the simplification logic in visitREM().
3944 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3945 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3946 if (SDValue DivRem = useDivRem(N))
3947 return DivRem;
3948
3949 return SDValue();
3950}
3951
3952SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3953 SDLoc DL(N);
3954 EVT VT = N->getValueType(0);
3955
3956 // fold (udiv x, (1 << c)) -> x >>u c
3957 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3958 DAG.isKnownToBeAPowerOfTwo(N1)) {
3959 SDValue LogBase2 = BuildLogBase2(N1, DL);
3960 AddToWorklist(LogBase2.getNode());
3961
3962 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3963 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3964 AddToWorklist(Trunc.getNode());
3965 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3966 }
3967
3968 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3969 if (N1.getOpcode() == ISD::SHL) {
3970 SDValue N10 = N1.getOperand(0);
3971 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3972 DAG.isKnownToBeAPowerOfTwo(N10)) {
3973 SDValue LogBase2 = BuildLogBase2(N10, DL);
3974 AddToWorklist(LogBase2.getNode());
3975
3976 EVT ADDVT = N1.getOperand(1).getValueType();
3977 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3978 AddToWorklist(Trunc.getNode());
3979 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3980 AddToWorklist(Add.getNode());
3981 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3982 }
3983 }
3984
3985 // fold (udiv x, c) -> alternate
3986 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3988 !TLI.isIntDivCheap(N->getValueType(0), Attr))
3989 if (SDValue Op = BuildUDIV(N))
3990 return Op;
3991
3992 return SDValue();
3993}
3994
3995// handles ISD::SREM and ISD::UREM
3996SDValue DAGCombiner::visitREM(SDNode *N) {
3997 unsigned Opcode = N->getOpcode();
3998 SDValue N0 = N->getOperand(0);
3999 SDValue N1 = N->getOperand(1);
4000 EVT VT = N->getValueType(0);
4001 EVT CCVT = getSetCCResultType(VT);
4002
4003 bool isSigned = (Opcode == ISD::SREM);
4004 SDLoc DL(N);
4005
4006 // fold (rem c1, c2) -> c1%c2
4009 if (N0C && N1C)
4010 if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
4011 return Folded;
4012 // fold (urem X, -1) -> select(X == -1, 0, x)
4013 if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4014 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4015 DAG.getConstant(0, DL, VT), N0);
4016
4017 if (SDValue V = simplifyDivRem(N, DAG))
4018 return V;
4019
4020 if (SDValue NewSel = foldBinOpIntoSelect(N))
4021 return NewSel;
4022
4023 if (isSigned) {
4024 // If we know the sign bits of both operands are zero, strength reduce to a
4025 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4026 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4027 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4028 } else {
4029 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4030 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4031 // fold (urem x, pow2) -> (and x, pow2-1)
4032 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4033 AddToWorklist(Add.getNode());
4034 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4035 }
4036 if (N1.getOpcode() == ISD::SHL &&
4038 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4039 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4040 AddToWorklist(Add.getNode());
4041 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4042 }
4043 }
4044
4045 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4046
4047 // If X/C can be simplified by the division-by-constant logic, lower
4048 // X%C to the equivalent of X-X/C*C.
4049 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4050 // speculative DIV must not cause a DIVREM conversion. We guard against this
4051 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4052 // combine will not return a DIVREM. Regardless, checking cheapness here
4053 // makes sense since the simplification results in fatter code.
4054 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4055 SDValue OptimizedDiv =
4056 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4057 if (OptimizedDiv.getNode()) {
4058 // If the equivalent Div node also exists, update its users.
4059 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4060 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4061 { N0, N1 }))
4062 CombineTo(DivNode, OptimizedDiv);
4063 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4064 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4065 AddToWorklist(OptimizedDiv.getNode());
4066 AddToWorklist(Mul.getNode());
4067 return Sub;
4068 }
4069 }
4070
4071 // sdiv, srem -> sdivrem
4072 if (SDValue DivRem = useDivRem(N))
4073 return DivRem.getValue(1);
4074
4075 return SDValue();
4076}
4077
4078SDValue DAGCombiner::visitMULHS(SDNode *N) {
4079 SDValue N0 = N->getOperand(0);
4080 SDValue N1 = N->getOperand(1);
4081 EVT VT = N->getValueType(0);
4082 SDLoc DL(N);
4083
4084 if (VT.isVector()) {
4085 // fold (mulhs x, 0) -> 0
4087 return N1;
4089 return N0;
4090 }
4091
4092 // fold (mulhs x, 0) -> 0
4093 if (isNullConstant(N1))
4094 return N1;
4095 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4096 if (isOneConstant(N1))
4097 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4098 DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
4100
4101 // fold (mulhs x, undef) -> 0
4102 if (N0.isUndef() || N1.isUndef())
4103 return DAG.getConstant(0, DL, VT);
4104
4105 // If the type twice as wide is legal, transform the mulhs to a wider multiply
4106 // plus a shift.
4107 if (VT.isSimple() && !VT.isVector()) {
4108 MVT Simple = VT.getSimpleVT();
4109 unsigned SimpleSize = Simple.getSizeInBits();
4110 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4111 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4112 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4113 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4114 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4115 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4116 DAG.getConstant(SimpleSize, DL,
4118 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4119 }
4120 }
4121
4122 return SDValue();
4123}
4124
4125SDValue DAGCombiner::visitMULHU(SDNode *N) {
4126 SDValue N0 = N->getOperand(0);
4127 SDValue N1 = N->getOperand(1);
4128 EVT VT = N->getValueType(0);
4129 SDLoc DL(N);
4130
4131 if (VT.isVector()) {
4132 // fold (mulhu x, 0) -> 0
4134 return N1;
4136 return N0;
4137 }
4138
4139 // fold (mulhu x, 0) -> 0
4140 if (isNullConstant(N1))
4141 return N1;
4142 // fold (mulhu x, 1) -> 0
4143 if (isOneConstant(N1))
4144 return DAG.getConstant(0, DL, N0.getValueType());
4145 // fold (mulhu x, undef) -> 0
4146 if (N0.isUndef() || N1.isUndef())
4147 return DAG.getConstant(0, DL, VT);
4148
4149 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4150 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4151 DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4152 unsigned NumEltBits = VT.getScalarSizeInBits();
4153 SDValue LogBase2 = BuildLogBase2(N1, DL);
4154 SDValue SRLAmt = DAG.getNode(
4155 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4156 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4157 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4158 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4159 }
4160
4161 // If the type twice as wide is legal, transform the mulhu to a wider multiply
4162 // plus a shift.
4163 if (VT.isSimple() && !VT.isVector()) {
4164 MVT Simple = VT.getSimpleVT();
4165 unsigned SimpleSize = Simple.getSizeInBits();
4166 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4167 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4168 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4169 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4170 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4171 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4172 DAG.getConstant(SimpleSize, DL,
4174 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4175 }
4176 }
4177
4178 return SDValue();
4179}
4180
4181/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4182/// give the opcodes for the two computations that are being performed. Return
4183/// true if a simplification was made.
4184SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4185 unsigned HiOp) {
4186 // If the high half is not needed, just compute the low half.
4187 bool HiExists = N->hasAnyUseOfValue(1);
4188 if (!HiExists && (!LegalOperations ||
4189 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4190 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4191 return CombineTo(N, Res, Res);
4192 }
4193
4194 // If the low half is not needed, just compute the high half.
4195 bool LoExists = N->hasAnyUseOfValue(0);
4196 if (!LoExists && (!LegalOperations ||
4197 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4198 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4199 return CombineTo(N, Res, Res);
4200 }
4201
4202 // If both halves are used, return as it is.
4203 if (LoExists && HiExists)
4204 return SDValue();
4205
4206 // If the two computed results can be simplified separately, separate them.
4207 if (LoExists) {
4208 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4209 AddToWorklist(Lo.getNode());
4210 SDValue LoOpt = combine(Lo.getNode());
4211 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4212 (!LegalOperations ||
4213 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4214 return CombineTo(N, LoOpt, LoOpt);
4215 }
4216
4217 if (HiExists) {
4218 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4219 AddToWorklist(Hi.getNode());
4220 SDValue HiOpt = combine(Hi.getNode());
4221 if (HiOpt.getNode() && HiOpt != Hi &&
4222 (!LegalOperations ||
4223 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4224 return CombineTo(N, HiOpt, HiOpt);
4225 }
4226
4227 return SDValue();
4228}
4229
4230SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4231 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4232 return Res;
4233
4234 EVT VT = N->getValueType(0);
4235 SDLoc DL(N);
4236
4237 // If the type is twice as wide is legal, transform the mulhu to a wider
4238 // multiply plus a shift.
4239 if (VT.isSimple() && !VT.isVector()) {
4240 MVT Simple = VT.getSimpleVT();
4241 unsigned SimpleSize = Simple.getSizeInBits();
4242 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4243 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4244 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4245 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4246 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4247 // Compute the high part as N1.
4248 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4249 DAG.getConstant(SimpleSize, DL,
4250 getShiftAmountTy(Lo.getValueType())));
4251 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4252 // Compute the low part as N0.
4253 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4254 return CombineTo(N, Lo, Hi);
4255 }
4256 }
4257
4258 return SDValue();
4259}
4260
4261SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4262 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4263 return Res;
4264
4265 EVT VT = N->getValueType(0);
4266 SDLoc DL(N);
4267
4268 // If the type is twice as wide is legal, transform the mulhu to a wider
4269 // multiply plus a shift.
4270 if (VT.isSimple() && !VT.isVector()) {
4271 MVT Simple = VT.getSimpleVT();
4272 unsigned SimpleSize = Simple.getSizeInBits();
4273 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4274 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4275 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4276 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4277 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4278 // Compute the high part as N1.
4279 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4280 DAG.getConstant(SimpleSize, DL,
4281 getShiftAmountTy(Lo.getValueType())));
4282 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4283 // Compute the low part as N0.
4284 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4285 return CombineTo(N, Lo, Hi);
4286 }
4287 }
4288
4289 return SDValue();
4290}
4291
4292SDValue DAGCombiner::visitMULO(SDNode *N) {
4293 bool IsSigned = (ISD::SMULO == N->getOpcode());
4294
4295 // (mulo x, 2) -> (addo x, x)
4296 if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
4297 if (C2->getAPIntValue() == 2)
4298 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
4299 N->getVTList(), N->getOperand(0), N->getOperand(0));
4300
4301 return SDValue();
4302}
4303
4304SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4305 SDValue N0 = N->getOperand(0);
4306 SDValue N1 = N->getOperand(1);
4307 EVT VT = N0.getValueType();
4308
4309 // fold vector ops
4310 if (VT.isVector())
4311 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4312 return FoldedVOp;
4313
4314 // fold operation with constant operands.
4317 if (N0C && N1C)
4318 return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
4319
4320 // canonicalize constant to RHS
4323 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4324
4325 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4326 // Only do this if the current op isn't legal and the flipped is.
4327 unsigned Opcode = N->getOpcode();
4328 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4329 if (!TLI.isOperationLegal(Opcode, VT) &&
4330 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4331 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4332 unsigned AltOpcode;
4333 switch (Opcode) {
4334 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4335 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4336 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4337 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4338 default: llvm_unreachable("Unknown MINMAX opcode");
4339 }
4340 if (TLI.isOperationLegal(AltOpcode, VT))
4341 return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4342 }
4343
4344 return SDValue();
4345}
4346
4347/// If this is a bitwise logic instruction and both operands have the same
4348/// opcode, try to sink the other opcode after the logic instruction.
4349SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4350 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4351 EVT VT = N0.getValueType();
4352 unsigned LogicOpcode = N->getOpcode();
4353 unsigned HandOpcode = N0.getOpcode();
4354 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4355 LogicOpcode == ISD::XOR) && "Expected logic opcode");
4356 assert(HandOpcode == N1.getOpcode() && "Bad input!");
4357
4358 // Bail early if none of these transforms apply.
4359 if (N0.getNumOperands() == 0)
4360 return SDValue();
4361
4362 // FIXME: We should check number of uses of the operands to not increase
4363 // the instruction count for all transforms.
4364
4365 // Handle size-changing casts.
4366 SDValue X = N0.getOperand(0);
4367 SDValue Y = N1.getOperand(0);
4368 EVT XVT = X.getValueType();
4369 SDLoc DL(N);
4370 if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4371 HandOpcode == ISD::SIGN_EXTEND) {
4372 // If both operands have other uses, this transform would create extra
4373 // instructions without eliminating anything.
4374 if (!N0.hasOneUse() && !N1.hasOneUse())
4375 return SDValue();
4376 // We need matching integer source types.
4377 if (XVT != Y.getValueType())
4378 return SDValue();
4379 // Don't create an illegal op during or after legalization. Don't ever
4380 // create an unsupported vector op.
4381 if ((VT.isVector() || LegalOperations) &&
4382 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4383 return SDValue();
4384 // Avoid infinite looping with PromoteIntBinOp.
4385 // TODO: Should we apply desirable/legal constraints to all opcodes?
4386 if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4387 !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4388 return SDValue();
4389 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4390 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4391 return DAG.getNode(HandOpcode, DL, VT, Logic);
4392 }
4393
4394 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4395 if (HandOpcode == ISD::TRUNCATE) {
4396 // If both operands have other uses, this transform would create extra
4397 // instructions without eliminating anything.
4398 if (!N0.hasOneUse() && !N1.hasOneUse())
4399 return SDValue();
4400 // We need matching source types.
4401 if (XVT != Y.getValueType())
4402 return SDValue();
4403 // Don't create an illegal op during or after legalization.
4404 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4405 return SDValue();
4406 // Be extra careful sinking truncate. If it's free, there's no benefit in
4407 // widening a binop. Also, don't create a logic op on an illegal type.
4408 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4409 return SDValue();
4410 if (!TLI.isTypeLegal(XVT))
4411 return SDValue();
4412 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4413 return DAG.getNode(HandOpcode, DL, VT, Logic);
4414 }
4415
4416 // For binops SHL/SRL/SRA/AND:
4417 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4418 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4419 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4420 N0.getOperand(1) == N1.getOperand(1)) {
4421 // If either operand has other uses, this transform is not an improvement.
4422 if (!N0.hasOneUse() || !N1.hasOneUse())
4423 return SDValue();
4424 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4425 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4426 }
4427
4428 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4429 if (HandOpcode == ISD::BSWAP) {
4430 // If either operand has other uses, this transform is not an improvement.
4431 if (!N0.hasOneUse() || !N1.hasOneUse())
4432 return SDValue();
4433 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4434 return DAG.getNode(HandOpcode, DL, VT, Logic);
4435 }
4436
4437 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4438 // Only perform this optimization up until type legalization, before
4439 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4440 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4441 // we don't want to undo this promotion.
4442 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4443 // on scalars.
4444 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4445 Level <= AfterLegalizeTypes) {
4446 // Input types must be integer and the same.
4447 if (XVT.isInteger() && XVT == Y.getValueType()) {
4448 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4449 return DAG.getNode(HandOpcode, DL, VT, Logic);
4450 }
4451 }
4452
4453 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4454 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4455 // If both shuffles use the same mask, and both shuffle within a single
4456 // vector, then it is worthwhile to move the swizzle after the operation.
4457 // The type-legalizer generates this pattern when loading illegal
4458 // vector types from memory. In many cases this allows additional shuffle
4459 // optimizations.
4460 // There are other cases where moving the shuffle after the xor/and/or
4461 // is profitable even if shuffles don't perform a swizzle.
4462 // If both shuffles use the same mask, and both shuffles have the same first
4463 // or second operand, then it might still be profitable to move the shuffle
4464 // after the xor/and/or operation.
4465 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4466 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4467 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4468 assert(X.getValueType() == Y.getValueType() &&
4469 "Inputs to shuffles are not the same type");
4470
4471 // Check that both shuffles use the same mask. The masks are known to be of
4472 // the same length because the result vector type is the same.
4473 // Check also that shuffles have only one use to avoid introducing extra
4474 // instructions.
4475 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4476 !SVN0->getMask().equals(SVN1->getMask()))
4477 return SDValue();
4478
4479 // Don't try to fold this node if it requires introducing a
4480 // build vector of all zeros that might be illegal at this stage.
4481 SDValue ShOp = N0.getOperand(1);
4482 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4483 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4484
4485 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4486 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4487 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4488 N0.getOperand(0), N1.getOperand(0));
4489 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4490 }
4491
4492 // Don't try to fold this node if it requires introducing a
4493 // build vector of all zeros that might be illegal at this stage.
4494 ShOp = N0.getOperand(0);
4495 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4496 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4497
4498 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4499 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4500 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4501 N1.getOperand(1));
4502 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4503 }
4504 }
4505
4506 return SDValue();
4507}
4508
4509/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4510SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4511 const SDLoc &DL) {
4512 SDValue LL, LR, RL, RR, N0CC, N1CC;
4513 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4514 !isSetCCEquivalent(N1, RL, RR, N1CC))
4515 return SDValue();
4516
4517 assert(N0.getValueType() == N1.getValueType() &&
4518 "Unexpected operand types for bitwise logic op");
4519 assert(LL.getValueType() == LR.getValueType() &&
4520 RL.getValueType() == RR.getValueType() &&
4521 "Unexpected operand types for setcc");
4522
4523 // If we're here post-legalization or the logic op type is not i1, the logic
4524 // op type must match a setcc result type. Also, all folds require new
4525 // operations on the left and right operands, so those types must match.
4526 EVT VT = N0.getValueType();
4527 EVT OpVT = LL.getValueType();
4528 if (LegalOperations || VT.getScalarType() != MVT::i1)
4529 if (VT != getSetCCResultType(OpVT))
4530 return SDValue();
4531 if (OpVT != RL.getValueType())
4532 return SDValue();
4533
4534 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4535 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4536 bool IsInteger = OpVT.isInteger();
4537 if (LR == RR && CC0 == CC1 && IsInteger) {
4538 bool IsZero = isNullOrNullSplat(LR);
4539 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4540
4541 // All bits clear?
4542 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4543 // All sign bits clear?
4544 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4545 // Any bits set?
4546 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4547 // Any sign bits set?
4548 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4549
4550 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4551 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4552 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4553 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4554 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4555 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4556 AddToWorklist(Or.getNode());
4557 return DAG.getSetCC(DL, VT, Or, LR, CC1);
4558 }
4559
4560 // All bits set?
4561 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4562 // All sign bits set?
4563 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4564 // Any bits clear?
4565 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4566 // Any sign bits clear?
4567 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4568
4569 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4570 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4571 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4572 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4573 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4574 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4575 AddToWorklist(And.getNode());
4576 return DAG.getSetCC(DL, VT, And, LR, CC1);
4577 }
4578 }
4579
4580 // TODO: What is the 'or' equivalent of this fold?
4581 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4582 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4583 IsInteger && CC0 == ISD::SETNE &&
4584 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4585 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4586 SDValue One = DAG.getConstant(1, DL, OpVT);
4587 SDValue Two = DAG.getConstant(2, DL, OpVT);
4588 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4589 AddToWorklist(Add.getNode());
4590 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4591 }
4592
4593 // Try more general transforms if the predicates match and the only user of
4594 // the compares is the 'and' or 'or'.
4595 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4596 N0.hasOneUse() && N1.hasOneUse()) {
4597 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4598 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4599 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4600 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4601 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4602 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4603 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4604 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4605 }
4606
4607 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4608 // TODO - support non-uniform vector amounts.
4609 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4610 // Match a shared variable operand and 2 non-opaque constant operands.
4613 if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4614 // Canonicalize larger constant as C0.
4615 if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4616 std::swap(C0, C1);
4617
4618 // The difference of the constants must be a single bit.
4619 const APInt &C0Val = C0->getAPIntValue();
4620 const APInt &C1Val = C1->getAPIntValue();
4621 if ((C0Val - C1Val).isPowerOf2()) {
4622 // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4623 // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4624 SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4625 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4626 SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4627 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4628 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4629 return DAG.getSetCC(DL, VT, And, Zero, CC0);
4630 }
4631 }
4632 }
4633 }
4634
4635 // Canonicalize equivalent operands to LL == RL.
4636 if (LL == RR && LR == RL) {
4638 std::swap(RL, RR);
4639 }
4640
4641 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4642 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4643 if (LL == RL && LR == RR) {
4644 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4645 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4646 if (NewCC != ISD::SETCC_INVALID &&
4647 (!LegalOperations ||
4648 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4649 TLI.isOperationLegal(ISD::SETCC, OpVT))))
4650 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4651 }
4652
4653 return SDValue();
4654}
4655
4656/// This contains all DAGCombine rules which reduce two values combined by
4657/// an And operation to a single value. This makes them reusable in the context
4658/// of visitSELECT(). Rules involving constants are not included as
4659/// visitSELECT() already handles those cases.
4660SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4661 EVT VT = N1.getValueType();
4662 SDLoc DL(N);
4663
4664 // fold (and x, undef) -> 0
4665 if (N0.isUndef() || N1.isUndef())
4666 return DAG.getConstant(0, DL, VT);
4667
4668 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4669 return V;
4670
4671 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4672 VT.getSizeInBits() <= 64) {
4673 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4674 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4675 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4676 // immediate for an add, but it is legal if its top c2 bits are set,
4677 // transform the ADD so the immediate doesn't need to be materialized
4678 // in a register.
4679 APInt ADDC = ADDI->getAPIntValue();
4680 APInt SRLC = SRLI->getAPIntValue();
4681 if (ADDC.getMinSignedBits() <= 64 &&
4682 SRLC.ult(VT.getSizeInBits()) &&
4683 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4685 SRLC.getZExtValue());
4686 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4687 ADDC |= Mask;
4688 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4689 SDLoc DL0(N0);
4690 SDValue NewAdd =
4691 DAG.getNode(ISD::ADD, DL0, VT,
4692 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4693 CombineTo(N0.getNode(), NewAdd);
4694 // Return N so it doesn't get rechecked!
4695 return SDValue(N, 0);
4696 }
4697 }
4698 }
4699 }
4700 }
4701 }
4702
4703 // Reduce bit extract of low half of an integer to the narrower type.
4704 // (and (srl i64:x, K), KMask) ->
4705 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4706 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4707 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4708 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4709 unsigned Size = VT.getSizeInBits();
4710 const APInt &AndMask = CAnd->getAPIntValue();
4711 unsigned ShiftBits = CShift->getZExtValue();
4712
4713 // Bail out, this node will probably disappear anyway.
4714 if (ShiftBits == 0)
4715 return SDValue();
4716
4717 unsigned MaskBits = AndMask.countTrailingOnes();
4718 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4719
4720 if (AndMask.isMask() &&
4721 // Required bits must not span the two halves of the integer and
4722 // must fit in the half size type.
4723 (ShiftBits + MaskBits <= Size / 2) &&
4724 TLI.isNarrowingProfitable(VT, HalfVT) &&
4725 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4726 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4727 TLI.isTruncateFree(VT, HalfVT) &&
4728 TLI.isZExtFree(HalfVT, VT)) {
4729 // The isNarrowingProfitable is to avoid regressions on PPC and
4730 // AArch64 which match a few 64-bit bit insert / bit extract patterns
4731 // on downstream users of this. Those patterns could probably be
4732 // extended to handle extensions mixed in.
4733
4734 SDValue SL(N0);
4735 assert(MaskBits <= Size);
4736
4737 // Extracting the highest bit of the low half.
4738 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4739 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4740 N0.getOperand(0));
4741
4742 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4743 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4744 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4745 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4746 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4747 }
4748 }
4749 }
4750 }
4751
4752 return SDValue();
4753}
4754
4755bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4756 EVT LoadResultTy, EVT &ExtVT) {
4757 if (!AndC->getAPIntValue().isMask())
4758 return false;
4759
4760 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4761
4762 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4763 EVT LoadedVT = LoadN->getMemoryVT();
4764
4765 if (ExtVT == LoadedVT &&
4766 (!LegalOperations ||
4767 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4768 // ZEXTLOAD will match without needing to change the size of the value being
4769 // loaded.
4770 return true;
4771 }
4772
4773 // Do not change the width of a volatile load.
4774 if (LoadN->isVolatile())
4775 return false;
4776
4777 // Do not generate loads of non-round integer types since these can
4778 // be expensive (and would be wrong if the type is not byte sized).
4779 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4780 return false;
4781
4782 if (LegalOperations &&
4783 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4784 return false;
4785
4786 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4787 return false;
4788
4789 return true;
4790}
4791
4792bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4793 ISD::LoadExtType ExtType, EVT &MemVT,
4794 unsigned ShAmt) {
4795 if (!LDST)
4796 return false;
4797 // Only allow byte offsets.
4798 if (ShAmt % 8)
4799 return false;
4800
4801 // Do not generate loads of non-round integer types since these can
4802 // be expensive (and would be wrong if the type is not byte sized).
4803 if (!MemVT.isRound())
4804 return false;
4805
4806 // Don't change the width of a volatile load.
4807 if (LDST->isVolatile())
4808 return false;
4809
4810 // Verify that we are actually reducing a load width here.
4811 if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4812 return false;
4813
4814 // Ensure that this isn't going to produce an unsupported unaligned access.
4815 if (ShAmt &&
4816 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4817 LDST->getAddressSpace(), ShAmt / 8,
4818 LDST->getMemOperand()->getFlags()))
4819 return false;
4820
4821 // It's not possible to generate a constant of extended or untyped type.
4822 EVT PtrType = LDST->getBasePtr().getValueType();
4823 if (PtrType == MVT::Untyped || PtrType.isExtended())
4824 return false;
4825
4826 if (isa<LoadSDNode>(LDST)) {
4827 LoadSDNode *Load = cast<LoadSDNode>(LDST);
4828 // Don't transform one with multiple uses, this would require adding a new
4829 // load.
4830 if (!SDValue(Load, 0).hasOneUse())
4831 return false;
4832
4833 if (LegalOperations &&
4834 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4835 return false;
4836
4837 // For the transform to be legal, the load must produce only two values
4838 // (the value loaded and the chain). Don't transform a pre-increment
4839 // load, for example, which produces an extra value. Otherwise the
4840 // transformation is not equivalent, and the downstream logic to replace
4841 // uses gets things wrong.
4842 if (Load->getNumValues() > 2)
4843 return false;
4844
4845 // If the load that we're shrinking is an extload and we're not just
4846 // discarding the extension we can't simply shrink the load. Bail.
4847 // TODO: It would be possible to merge the extensions in some cases.
4848 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4849 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4850 return false;
4851
4852 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4853 return false;
4854 } else {
4855 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4856 StoreSDNode *Store = cast<StoreSDNode>(LDST);
4857 // Can't write outside the original store
4858 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4859 return false;
4860
4861 if (LegalOperations &&
4862 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4863 return false;
4864 }
4865 return true;
4866}
4867
4868bool DAGCombiner::SearchForAndLoads(SDNode *N,
4870 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4871 ConstantSDNode *Mask,
4872 SDNode *&NodeToMask) {
4873 // Recursively search for the operands, looking for loads which can be
4874 // narrowed.
4875 for (SDValue Op : N->op_values()) {
4876 if (Op.getValueType().isVector())
4877 return false;
4878
4879 // Some constants may need fixing up later if they are too large.
4880 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4881 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4882 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4883 NodesWithConsts.insert(N);
4884 continue;
4885 }
4886
4887 if (!Op.hasOneUse())
4888 return false;
4889
4890 switch(Op.getOpcode()) {
4891 case ISD::LOAD: {
4892 auto *Load = cast<LoadSDNode>(Op);
4893 EVT ExtVT;
4894 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4895 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4896
4897 // ZEXTLOAD is already small enough.
4898 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4899 ExtVT.bitsGE(Load->getMemoryVT()))
4900 continue;
4901
4902 // Use LE to convert equal sized loads to zext.
4903 if (ExtVT.bitsLE(Load->getMemoryVT()))
4904 Loads.push_back(Load);
4905
4906 continue;
4907 }
4908 return false;
4909 }
4910 case ISD::ZERO_EXTEND:
4911 case ISD::AssertZext: {
4912 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4913 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4914 EVT VT = Op.getOpcode() == ISD::AssertZext ?
4915 cast<VTSDNode>(Op.getOperand(1))->getVT() :
4916 Op.getOperand(0).getValueType();
4917
4918 // We can accept extending nodes if the mask is wider or an equal
4919 // width to the original type.
4920 if (ExtVT.bitsGE(VT))
4921 continue;
4922 break;
4923 }
4924 case ISD::OR:
4925 case ISD::XOR:
4926 case ISD::AND:
4927 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4928 NodeToMask))
4929 return false;
4930 continue;
4931 }
4932
4933 // Allow one node which will masked along with any loads found.
4934 if (NodeToMask)
4935 return false;
4936
4937 // Also ensure that the node to be masked only produces one data result.
4938 NodeToMask = Op.getNode();
4939 if (NodeToMask->getNumValues() > 1) {
4940 bool HasValue = false;
4941 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4942 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4943 if (VT != MVT::Glue && VT != MVT::Other) {
4944 if (HasValue) {
4945 NodeToMask = nullptr;
4946 return false;
4947 }
4948 HasValue = true;
4949 }
4950 }
4951 assert(HasValue && "Node to be masked has no data result?");
4952 }
4953 }
4954 return true;
4955}
4956
4957bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4958 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4959 if (!Mask)
4960 return false;
4961
4962 if (!Mask->getAPIntValue().isMask())
4963 return false;
4964
4965 // No need to do anything if the and directly uses a load.
4966 if (isa<LoadSDNode>(N->getOperand(0)))
4967 return false;
4968
4970 SmallPtrSet<SDNode*, 2> NodesWithConsts;
4971 SDNode *FixupNode = nullptr;
4972 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4973 if (Loads.size() == 0)
4974 return false;
4975
4976 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4977 SDValue MaskOp = N->getOperand(1);
4978
4979 // If it exists, fixup the single node we allow in the tree that needs
4980 // masking.
4981 if (FixupNode) {
4982 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4983 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4984 FixupNode->getValueType(0),
4985 SDValue(FixupNode, 0), MaskOp);
4986 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4987 if (And.getOpcode() == ISD ::AND)
4988 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4989 }
4990
4991 // Narrow any constants that need it.
4992 for (auto *LogicN : NodesWithConsts) {
4993 SDValue Op0 = LogicN->getOperand(0);
4994 SDValue Op1 = LogicN->getOperand(1);
4995
4996 if (isa<ConstantSDNode>(Op0))
4997 std::swap(Op0, Op1);
4998
4999 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5000 Op1, MaskOp);
5001
5002 DAG.UpdateNodeOperands(LogicN, Op0, And);
5003 }
5004
5005 // Create narrow loads.
5006 for (auto *Load : Loads) {
5007 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5008 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5009 SDValue(Load, 0), MaskOp);
5010 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5011 if (And.getOpcode() == ISD ::AND)
5012 And = SDValue(
5013 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5014 SDValue NewLoad = ReduceLoadWidth(And.getNode());
5015 assert(NewLoad &&
5016 "Shouldn't be masking the load if it can't be narrowed");
5017 CombineTo(Load, NewLoad, NewLoad.getValue(1));
5018 }
5019 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5020 return true;
5021 }
5022 return false;
5023}
5024
5025// Unfold
5026// x & (-1 'logical shift' y)
5027// To
5028// (x 'opposite logical shift' y) 'logical shift' y
5029// if it is better for performance.
5030SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5031 assert(N->getOpcode() == ISD::AND);
5032
5033 SDValue N0 = N->getOperand(0);
5034 SDValue N1 = N->getOperand(1);
5035
5036 // Do we actually prefer shifts over mask?
5038 return SDValue();
5039
5040 // Try to match (-1 '[outer] logical shift' y)
5041 unsigned OuterShift;
5042 unsigned InnerShift; // The opposite direction to the OuterShift.
5043 SDValue Y; // Shift amount.
5044 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5045 if (!M.hasOneUse())
5046 return false;
5047 OuterShift = M->getOpcode();
5048 if (OuterShift == ISD::SHL)
5049 InnerShift = ISD::SRL;
5050 else if (OuterShift == ISD::SRL)
5051 InnerShift = ISD::SHL;
5052 else
5053 return false;
5054 if (!isAllOnesConstant(M->getOperand(0)))
5055 return false;
5056 Y = M->getOperand(1);
5057 return true;
5058 };
5059
5060 SDValue X;
5061 if (matchMask(N1))
5062 X = N0;
5063 else if (matchMask(N0))
5064 X = N1;
5065 else
5066 return SDValue();
5067
5068 SDLoc DL(N);
5069 EVT VT = N->getValueType(0);
5070
5071 // tmp = x 'opposite logical shift' y
5072 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5073 // ret = tmp 'logical shift' y
5074 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5075
5076 return T1;
5077}
5078
5079SDValue DAGCombiner::visitAND(SDNode *N) {
5080 SDValue N0 = N->getOperand(0);
5081 SDValue N1 = N->getOperand(1);
5082 EVT VT = N1.getValueType();
5083
5084 // x & x --> x
5085 if (N0 == N1)
5086 return N0;
5087
5088 // fold vector ops
5089 if (VT.isVector()) {
5090 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5091 return FoldedVOp;
5092
5093 // fold (and x, 0) -> 0, vector edition
5095 // do not return N0, because undef node may exist in N0
5097 SDLoc(N), N0.getValueType());
5099 // do not return N1, because undef node may exist in N1
5101 SDLoc(N), N1.getValueType());
5102
5103 // fold (and x, -1) -> x, vector edition
5105 return N1;
5107 return N0;
5108 }
5109
5110 // fold (and c1, c2) -> c1&c2
5113 if (N0C && N1C && !N1C->isOpaque())
5114 return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
5115 // canonicalize constant to RHS
5118 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5119 // fold (and x, -1) -> x
5120 if (isAllOnesConstant(N1))
5121 return N0;
5122 // if (and x, c) is known to be zero, return 0
5123 unsigned BitWidth = VT.getScalarSizeInBits();
5124 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5125 APInt::getAllOnesValue(BitWidth)))
5126 return DAG.getConstant(0, SDLoc(N), VT);
5127
5128 if (SDValue NewSel = foldBinOpIntoSelect(N))
5129 return NewSel;
5130
5131 // reassociate and
5132 if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5133 return RAND;
5134
5135 // Try to convert a constant mask AND into a shuffle clear mask.
5136 if (VT.isVector())
5137 if (SDValue Shuffle = XformToShuffleWithZero(N))
5138 return Shuffle;
5139
5140 // fold (and (or x, C), D) -> D if (C & D) == D
5141 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5142 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5143 };
5144 if (N0.getOpcode() == ISD::OR &&
5145 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5146 return N1;
5147 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5148 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5149 SDValue N0Op0 = N0.getOperand(0);
5150 APInt Mask = ~N1C->getAPIntValue();
5151 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5152 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5153 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5154 N0.getValueType(), N0Op0);
5155
5156 // Replace uses of the AND with uses of the Zero extend node.
5157 CombineTo(N, Zext);
5158
5159 // We actually want to replace all uses of the any_extend with the
5160 // zero_extend, to avoid duplicating things. This will later cause this
5161 // AND to be folded.
5162 CombineTo(N0.getNode(), Zext);
5163 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5164 }
5165 }
5166 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5167 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5168 // already be zero by virtue of the width of the base type of the load.
5169 //
5170 // the 'X' node here can either be nothing or an extract_vector_elt to catch
5171 // more cases.
5172 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5174 N0.getOperand(0).getOpcode() == ISD::LOAD &&
5175 N0.getOperand(0).getResNo() == 0) ||
5176 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5177 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5178 N0 : N0.getOperand(0) );
5179
5180 // Get the constant (if applicable) the zero'th operand is being ANDed with.
5181 // This can be a pure constant or a vector splat, in which case we treat the
5182 // vector as a scalar and use the splat value.
5184 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5185 Constant = C->getAPIntValue();
5186 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5187 APInt SplatValue, SplatUndef;
5188 unsigned SplatBitSize;
5189 bool HasAnyUndefs;
5190 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5191 SplatBitSize, HasAnyUndefs);
5192 if (IsSplat) {
5193 // Undef bits can contribute to a possible optimisation if set, so
5194 // set them.
5195 SplatValue |= SplatUndef;
5196
5197 // The splat value may be something like "0x00FFFFFF", which means 0 for
5198 // the first vector value and FF for the rest, repeating. We need a mask
5199 // that will apply equally to all members of the vector, so AND all the
5200 // lanes of the constant together.
5201 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5202
5203 // If the splat value has been compressed to a bitlength lower
5204 // than the size of the vector lane, we need to re-expand it to
5205 // the lane size.
5206 if (EltBitWidth > SplatBitSize)
5207 for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5208 SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5209 SplatValue |= SplatValue.shl(SplatBitSize);
5210
5211 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5212 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5213 if ((SplatBitSize % EltBitWidth) == 0) {
5214 Constant = APInt::getAllOnesValue(EltBitWidth);
5215 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5216 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5217 }
5218 }
5219 }
5220
5221 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5222 // actually legal and isn't going to get expanded, else this is a false
5223 // optimisation.
5224 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5225 Load->getValueType(0),
5226 Load->getMemoryVT());
5227
5228 // Resize the constant to the same size as the original memory access before
5229 // extension. If it is still the AllOnesValue then this AND is completely
5230 // unneeded.
5231 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5232
5233 bool B;
5234 switch (Load->getExtensionType()) {
5235 default: B = false; break;
5236 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5237 case ISD::ZEXTLOAD:
5238 case ISD::NON_EXTLOAD: B = true; break;
5239 }
5240
5241 if (B && Constant.isAllOnesValue()) {
5242 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5243 // preserve semantics once we get rid of the AND.
5244 SDValue NewLoad(Load, 0);
5245
5246 // Fold the AND away. NewLoad may get replaced immediately.
5247 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5248
5249 if (Load->getExtensionType() == ISD::EXTLOAD) {
5250 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5251 Load->getValueType(0), SDLoc(Load),
5252 Load->getChain(), Load->getBasePtr(),
5253 Load->getOffset(), Load->getMemoryVT(),
5254 Load->getMemOperand());
5255 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5256 if (Load->getNumValues() == 3) {
5257 // PRE/POST_INC loads have 3 values.
5258 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5259 NewLoad.getValue(2) };
5260 CombineTo(Load, To, 3, true);
5261 } else {
5262 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5263 }
5264 }
5265
5266 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5267 }
5268 }
5269
5270 // fold (and (load x), 255) -> (zextload x, i8)
5271 // fold (and (extload x, i16), 255) -> (zextload x, i8)
5272 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5273 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5274 (N0.getOpcode() == ISD::ANY_EXTEND &&
5275 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5276 if (SDValue Res = ReduceLoadWidth(N)) {
5277 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5278 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5279 AddToWorklist(N);
5280 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5281 return SDValue(N, 0);
5282 }
5283 }
5284
5285 if (Level >= AfterLegalizeTypes) {
5286 // Attempt to propagate the AND back up to the leaves which, if they're
5287 // loads, can be combined to narrow loads and the AND node can be removed.
5288 // Perform after legalization so that extend nodes will already be
5289 // combined into the loads.
5290 if (BackwardsPropagateMask(N, DAG)) {
5291 return SDValue(N, 0);
5292 }
5293 }
5294
5295 if (SDValue Combined = visitANDLike(N0, N1, N))
5296 return Combined;
5297
5298 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
5299 if (N0.getOpcode() == N1.getOpcode())
5300 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5301 return V;
5302
5303 // Masking the negated extension of a boolean is just the zero-extended
5304 // boolean:
5305 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5306 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5307 //
5308 // Note: the SimplifyDemandedBits fold below can make an information-losing
5309 // transform, and then we have no way to find this better fold.
5310 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5311 if (isNullOrNullSplat(N0.getOperand(0))) {
5312 SDValue SubRHS = N0.getOperand(1);
5313 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5314 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5315 return SubRHS;
5316 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5317 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5318 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5319 }
5320 }
5321
5322 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5323 // fold (and (sra)) -> (and (srl)) when possible.
5325 return SDValue(N, 0);
5326
5327 // fold (zext_inreg (extload x)) -> (zextload x)
5328 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5329 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5330 (ISD::isEXTLoad(N0.getNode()) ||
5331 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5332 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5333 EVT MemVT = LN0->getMemoryVT();
5334 // If we zero all the possible extended bits, then we can turn this into
5335 // a zextload if we are running before legalize or the operation is legal.
5336 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5337 unsigned MemBitSize = MemVT.getScalarSizeInBits();
5338 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5339 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5340 ((!LegalOperations && !LN0->isVolatile()) ||
5341 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5342 SDValue ExtLoad =
5343 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5344 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5345 AddToWorklist(N);
5346 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5347 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5348 }
5349 }
5350
5351 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5352 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5353 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5354 N0.getOperand(1), false))
5355 return BSwap;
5356 }
5357
5358 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5359 return Shifts;
5360
5361 return SDValue();
5362}
5363
5364/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5365SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5366 bool DemandHighBits) {
5367 if (!LegalOperations)
5368 return SDValue();
5369
5370 EVT VT = N->getValueType(0);
5371 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5372 return SDValue();
5374 return SDValue();
5375
5376 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5377 bool LookPassAnd0 = false;
5378 bool LookPassAnd1 = false;
5379 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5380 std::swap(N0, N1);
5381 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5382 std::swap(N0, N1);
5383 if (N0.getOpcode() == ISD::AND) {
5384 if (!N0.getNode()->hasOneUse())
5385 return SDValue();
5386 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5387 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5388 // This is needed for X86.
5389 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5390 N01C->getZExtValue() != 0xFFFF))
5391 return SDValue();
5392 N0 = N0.getOperand(0);
5393 LookPassAnd0 = true;
5394 }
5395
5396 if (N1.getOpcode() == ISD::AND) {
5397 if (!N1.getNode()->hasOneUse())
5398 return SDValue();
5399 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5400 if (!N11C || N11C->getZExtValue() != 0xFF)
5401 return SDValue();
5402 N1 = N1.getOperand(0);
5403 LookPassAnd1 = true;
5404 }
5405
5406 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5407 std::swap(N0, N1);
5408 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5409 return SDValue();
5410 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5411 return SDValue();
5412
5413 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5414 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5415 if (!N01C || !N11C)
5416 return SDValue();
5417 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5418 return SDValue();
5419
5420 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5421 SDValue N00 = N0->getOperand(0);
5422 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5423 if (!N00.getNode()->hasOneUse())
5424 return SDValue();
5425 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5426 if (!N001C || N001C->getZExtValue() != 0xFF)
5427 return SDValue();
5428 N00 = N00.getOperand(0);
5429 LookPassAnd0 = true;
5430 }
5431
5432 SDValue N10 = N1->getOperand(0);
5433 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5434 if (!N10.getNode()->hasOneUse())
5435 return SDValue();
5436 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5437 // Also allow 0xFFFF since the bits will be shifted out. This is needed
5438 // for X86.
5439 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5440 N101C->getZExtValue() != 0xFFFF))
5441 return SDValue();
5442 N10 = N10.getOperand(0);
5443 LookPassAnd1 = true;
5444 }
5445
5446 if (N00 != N10)
5447 return SDValue();
5448
5449 // Make sure everything beyond the low halfword gets set to zero since the SRL
5450 // 16 will clear the top bits.
5451 unsigned OpSizeInBits = VT.getSizeInBits();
5452 if (DemandHighBits && OpSizeInBits > 16) {
5453 // If the left-shift isn't masked out then the only way this is a bswap is
5454 // if all bits beyond the low 8 are 0. In that case the entire pattern
5455 // reduces to a left shift anyway: leave it for other parts of the combiner.
5456 if (!LookPassAnd0)
5457 return SDValue();
5458
5459 // However, if the right shift isn't masked out then it might be because
5460 // it's not needed. See if we can spot that too.
5461 if (!LookPassAnd1 &&
5462 !DAG.MaskedValueIsZero(
5463 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5464 return SDValue();
5465 }
5466
5467 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5468 if (OpSizeInBits > 16) {
5469 SDLoc DL(N);
5470 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5471 DAG.getConstant(OpSizeInBits - 16, DL,
5472 getShiftAmountTy(VT)));
5473 }
5474 return Res;
5475}
5476
5477/// Return true if the specified node is an element that makes up a 32-bit
5478/// packed halfword byteswap.
5479/// ((x & 0x000000ff) << 8) |
5480/// ((x & 0x0000ff00) >> 8) |
5481/// ((x & 0x00ff0000) << 8) |
5482/// ((x & 0xff000000) >> 8)
5484 if (!N.getNode()->hasOneUse())
5485 return false;
5486
5487 unsigned Opc = N.getOpcode();
5488 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5489 return false;
5490
5491 SDValue N0 = N.getOperand(0);
5492 unsigned Opc0 = N0.getOpcode();
5493 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5494 return false;
5495
5496 ConstantSDNode *N1C = nullptr;
5497 // SHL or SRL: look upstream for AND mask operand
5498 if (Opc == ISD::AND)
5499 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5500 else if (Opc0 == ISD::AND)
5501 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5502 if (!N1C)
5503 return false;
5504
5505 unsigned MaskByteOffset;
5506 switch (N1C->getZExtValue()) {
5507 default:
5508 return false;
5509 case 0xFF: MaskByteOffset = 0; break;
5510 case 0xFF00: MaskByteOffset = 1; break;
5511 case 0xFFFF:
5512 // In case demanded bits didn't clear the bits that will be shifted out.
5513 // This is needed for X86.
5514 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5515 MaskByteOffset = 1;
5516 break;
5517 }
5518 return false;
5519 case 0xFF0000: MaskByteOffset = 2; break;
5520 case 0xFF000000: MaskByteOffset = 3; break;
5521 }
5522
5523 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5524 if (Opc == ISD::AND) {
5525 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5526 // (x >> 8) & 0xff
5527 // (x >> 8) & 0xff0000
5528 if (Opc0 != ISD::SRL)
5529 return false;
5530 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5531 if (!C || C->getZExtValue() != 8)
5532 return false;
5533 } else {
5534 // (x << 8) & 0xff00
5535 // (x << 8) & 0xff000000
5536 if (Opc0 != ISD::SHL)
5537 return false;
5538 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5539 if (!C || C->getZExtValue() != 8)
5540 return false;
5541 }
5542 } else if (Opc == ISD::SHL) {
5543 // (x & 0xff) << 8
5544 // (x & 0xff0000) << 8
5545 if (MaskByteOffset != 0 && MaskByteOffset != 2)
5546 return false;
5547 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5548 if (!C || C->getZExtValue() != 8)
5549 return false;
5550 } else { // Opc == ISD::SRL
5551 // (x & 0xff00) >> 8
5552 // (x & 0xff000000) >> 8
5553 if (MaskByteOffset != 1 && MaskByteOffset != 3)
5554 return false;
5555 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5556 if (!C || C->getZExtValue() != 8)
5557 return false;
5558 }
5559
5560 if (Parts[MaskByteOffset])
5561 return false;
5562
5563 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5564 return true;
5565}
5566
5567/// Match a 32-bit packed halfword bswap. That is
5568/// ((x & 0x000000ff) << 8) |
5569/// ((x & 0x0000ff00) >> 8) |
5570/// ((x & 0x00ff0000) << 8) |
5571/// ((x & 0xff000000) >> 8)
5572/// => (rotl (bswap x), 16)
5573SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5574 if (!LegalOperations)
5575 return SDValue();
5576
5577 EVT VT = N->getValueType(0);
5578 if (VT != MVT::i32)
5579 return SDValue();
5581 return SDValue();
5582
5583 // Look for either
5584 // (or (or (and), (and)), (or (and), (and)))
5585 // (or (or (or (and), (and)), (and)), (and))
5586 if (N0.getOpcode() != ISD::OR)
5587 return SDValue();
5588 SDValue N00 = N0.getOperand(0);
5589 SDValue N01 = N0.getOperand(1);
5590 SDNode *Parts[4] = {};
5591
5592 if (N1.getOpcode() == ISD::OR &&
5593 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
5594 // (or (or (and), (and)), (or (and), (and)))
5595 if (!isBSwapHWordElement(N00, Parts))
5596 return SDValue();
5597
5598 if (!isBSwapHWordElement(N01, Parts))
5599 return SDValue();
5600 SDValue N10 = N1.getOperand(0);
5601 if (!isBSwapHWordElement(N10, Parts))
5602 return SDValue();
5603 SDValue N11 = N1.getOperand(1);
5604 if (!isBSwapHWordElement(N11, Parts))
5605 return SDValue();
5606 } else {
5607 // (or (or (or (and), (and)), (and)), (and))
5608 if (!isBSwapHWordElement(N1, Parts))
5609 return SDValue();
5610 if (!isBSwapHWordElement(N01, Parts))
5611 return SDValue();
5612 if (N00.getOpcode() != ISD::OR)
5613 return SDValue();
5614 SDValue N000 = N00.getOperand(0);
5615 if (!isBSwapHWordElement(N000, Parts))
5616 return SDValue();
5617 SDValue N001 = N00.getOperand(1);
5618 if (!isBSwapHWordElement(N001, Parts))
5619 return SDValue();
5620 }
5621
5622 // Make sure the parts are all coming from the same node.
5623 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5624 return SDValue();
5625
5626 SDLoc DL(N);
5627 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5628 SDValue(Parts[0], 0));
5629
5630 // Result of the bswap should be rotated by 16. If it's not legal, then
5631 // do (x << 16) | (x >> 16).
5632 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5634 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5636 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5637 return DAG.getNode(ISD::OR, DL, VT,
5638 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5639 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5640}
5641
5642/// This contains all DAGCombine rules which reduce two values combined by
5643/// an Or operation to a single value \see visitANDLike().
5644SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5645 EVT VT = N1.getValueType();
5646 SDLoc DL(N);
5647
5648 // fold (or x, undef) -> -1
5649 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5650 return DAG.getAllOnesConstant(DL, VT);
5651
5652 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5653 return V;
5654
5655 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
5656 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5657 // Don't increase # computations.
5658 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5659 // We can only do this xform if we know that bits from X that are set in C2
5660 // but not in C1 are already zero. Likewise for Y.
5661 if (const ConstantSDNode *N0O1C =
5663 if (const ConstantSDNode *N1O1C =
5665 // We can only do this xform if we know that bits from X that are set in
5666 // C2 but not in C1 are already zero. Likewise for Y.
5667 const APInt &LHSMask = N0O1C->getAPIntValue();
5668 const APInt &RHSMask = N1O1C->getAPIntValue();
5669
5670 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5671 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5672 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5673 N0.getOperand(0), N1.getOperand(0));
5674 return DAG.getNode(ISD::AND, DL, VT, X,
5675 DAG.getConstant(LHSMask | RHSMask, DL, VT));
5676 }
5677 }
5678 }
5679 }
5680
5681 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5682 if (N0.getOpcode() == ISD::AND &&
5683 N1.getOpcode() == ISD::AND &&
5684 N0.getOperand(0) == N1.getOperand(0) &&
5685 // Don't increase # computations.
5686 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5687 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5688 N0.getOperand(1), N1.getOperand(1));
5689 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5690 }
5691
5692 return SDValue();
5693}
5694
5695/// OR combines for which the commuted variant will be tried as well.
5697 SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5698 EVT VT = N0.getValueType();
5699 if (N0.getOpcode() == ISD::AND) {
5700 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5701 if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
5702 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5703
5704 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5705 if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
5706 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5707 }
5708
5709 return SDValue();
5710}
5711
5712SDValue DAGCombiner::visitOR(SDNode *N) {
5713 SDValue N0 = N->getOperand(0);
5714 SDValue N1 = N->getOperand(1);
5715 EVT VT = N1.getValueType();
5716
5717 // x | x --> x
5718 if (N0 == N1)
5719 return N0;
5720
5721 // fold vector ops
5722 if (VT.isVector()) {
5723 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5724 return FoldedVOp;
5725
5726 // fold (or x, 0) -> x, vector edition
5728 return N1;
5730 return N0;
5731
5732 // fold (or x, -1) -> -1, vector edition
5734 // do not return N0, because undef node may exist in N0
5735 return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5737 // do not return N1, because undef node may exist in N1
5738 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5739
5740 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5741 // Do this only if the resulting shuffle is legal.
5742 if (isa<ShuffleVectorSDNode>(N0) &&
5743 isa<ShuffleVectorSDNode>(N1) &&
5744 // Avoid folding a node with illegal type.
5745 TLI.isTypeLegal(VT)) {
5746 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5747 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5748 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5749 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5750 // Ensure both shuffles have a zero input.
5751 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5752 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5753 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5754 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5755 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5756 bool CanFold = true;
5757 int NumElts = VT.getVectorNumElements();
5758 SmallVector<int, 4> Mask(NumElts);
5759
5760 for (int i = 0; i != NumElts; ++i) {
5761 int M0 = SV0->getMaskElt(i);
5762 int M1 = SV1->getMaskElt(i);
5763
5764 // Determine if either index is pointing to a zero vector.
5765 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5766 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5767
5768 // If one element is zero and the otherside is undef, keep undef.
5769 // This also handles the case that both are undef.
5770 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5771 Mask[i] = -1;
5772 continue;
5773 }
5774
5775 // Make sure only one of the elements is zero.
5776 if (M0Zero == M1Zero) {
5777 CanFold = false;
5778 break;
5779 }
5780
5781 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5782
5783 // We have a zero and non-zero element. If the non-zero came from
5784 // SV0 make the index a LHS index. If it came from SV1, make it
5785 // a RHS index. We need to mod by NumElts because we don't care
5786 // which operand it came from in the original shuffles.
5787 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5788 }
5789
5790 if (CanFold) {
5791 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5792 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5793
5794 bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5795 if (!LegalMask) {
5796 std::swap(NewLHS, NewRHS);
5798 LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5799 }
5800
5801 if (LegalMask)
5802 return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5803 }
5804 }
5805 }
5806 }
5807
5808 // fold (or c1, c2) -> c1|c2
5810 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5811 if (N0C && N1C && !N1C->isOpaque())
5812 return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5813 // canonicalize constant to RHS
5816 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5817 // fold (or x, 0) -> x
5818 if (isNullConstant(N1))
5819 return N0;
5820 // fold (or x, -1) -> -1
5821 if (isAllOnesConstant(N1))
5822 return N1;
5823
5824 if (SDValue NewSel = foldBinOpIntoSelect(N))
5825 return NewSel;
5826
5827 // fold (or x, c) -> c iff (x & ~c) == 0
5828 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5829 return N1;
5830
5831 if (SDValue Combined = visitORLike(N0, N1, N))
5832 return Combined;
5833
5834 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5835 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5836 return BSwap;
5837 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5838 return BSwap;
5839
5840 // reassociate or
5841 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5842 return ROR;
5843
5844 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5845 // iff (c1 & c2) != 0 or c1/c2 are undef.
5846 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
5847 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
5848 };
5849 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5850 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
5851 if (SDValue COR = DAG.FoldConstantArithmetic(
5852 ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5853 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5854 AddToWorklist(IOR.getNode());
5855 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5856 }
5857 }
5858
5859 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
5860 return Combined;
5861 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
5862 return Combined;
5863
5864 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
5865 if (N0.getOpcode() == N1.getOpcode())
5866 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5867 return V;
5868
5869 // See if this is some rotate idiom.
5870 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5871 return SDValue(Rot, 0);
5872
5873 if (SDValue Load = MatchLoadCombine(N))
5874 return Load;
5875
5876 // Simplify the operands using demanded-bits information.
5878 return SDValue(N, 0);
5879
5880 // If OR can be rewritten into ADD, try combines based on ADD.
5881 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
5882 DAG.haveNoCommonBitsSet(N0, N1))
5883 if (SDValue Combined = visitADDLike(N))
5884 return Combined;
5885
5886 return SDValue();
5887}
5888
5890 if (Op.getOpcode() == ISD::AND &&
5891 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5892 Mask = Op.getOperand(1);
5893 return Op.getOperand(0);
5894 }
5895 return Op;
5896}
5897
5898/// Match "(X shl/srl V1) & V2" where V2 may not be present.
5899static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5900 SDValue &Mask) {
5901 Op = stripConstantMask(DAG, Op, Mask);
5902 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5903 Shift = Op;
5904 return true;
5905 }
5906 return false;
5907}
5908
5909/// Helper function for visitOR to extract the needed side of a rotate idiom
5910/// from a shl/srl/mul/udiv. This is meant to handle cases where
5911/// InstCombine merged some outside op with one of the shifts from
5912/// the rotate pattern.
5913/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5914/// Otherwise, returns an expansion of \p ExtractFrom based on the following
5915/// patterns:
5916///
5917/// (or (mul v c0) (shrl (mul v c1) c2)):
5918/// expands (mul v c0) -> (shl (mul v c1) c3)
5919///
5920/// (or (udiv v c0) (shl (udiv v c1) c2)):
5921/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
5922///
5923/// (or (shl v c0) (shrl (shl v c1) c2)):
5924/// expands (shl v c0) -> (shl (shl v c1) c3)
5925///
5926/// (or (shrl v c0) (shl (shrl v c1) c2)):
5927/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
5928///
5929/// Such that in all cases, c3+c2==bitwidth(op v c1).
5931 SDValue ExtractFrom, SDValue &Mask,
5932 const SDLoc &DL) {
5933 assert(OppShift && ExtractFrom && "Empty SDValue");
5934 assert(
5935 (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5936 "Existing shift must be valid as a rotate half");
5937
5938 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5939 // Preconditions:
5940 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5941 //
5942 // Find opcode of the needed shift to be extracted from (op0 v c0).
5943 unsigned Opcode = ISD::DELETED_NODE;
5944 bool IsMulOrDiv = false;
5945 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5946 // opcode or its arithmetic (mul or udiv) variant.
5947 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5948 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5949 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5950 return false;
5951 Opcode = NeededShift;
5952 return true;
5953 };
5954 // op0 must be either the needed shift opcode or the mul/udiv equivalent
5955 // that the needed shift can be extracted from.
5956 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5957 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5958 return SDValue();
5959
5960 // op0 must be the same opcode on both sides, have the same LHS argument,
5961 // and produce the same value type.
5962 SDValue OppShiftLHS = OppShift.getOperand(0);
5963 EVT ShiftedVT = OppShiftLHS.getValueType();
5964 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5965 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5966 ShiftedVT != ExtractFrom.getValueType())
5967 return SDValue();
5968
5969 // Amount of the existing shift.
5970 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5971 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5972 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5973 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5974 ConstantSDNode *ExtractFromCst =
5975 isConstOrConstSplat(ExtractFrom.getOperand(1));
5976 // TODO: We should be able to handle non-uniform constant vectors for these values
5977 // Check that we have constant values.
5978 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5979 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5980 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5981 return SDValue();
5982
5983 // Compute the shift amount we need to extract to complete the rotate.
5984 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5985 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5986 return SDValue();
5987 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5988 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5989 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5990 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5991 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5992
5993 // Now try extract the needed shift from the ExtractFrom op and see if the
5994 // result matches up with the existing shift's LHS op.
5995 if (IsMulOrDiv) {
5996 // Op to extract from is a mul or udiv by a constant.
5997 // Check:
5998 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5999 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6000 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6001 NeededShiftAmt.getZExtValue());
6002 APInt ResultAmt;
6003 APInt Rem;
6004 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6005 if (Rem != 0 || ResultAmt != OppLHSAmt)
6006 return SDValue();
6007 } else {
6008 // Op to extract from is a shift by a constant.
6009 // Check:
6010 // c2 - (bitwidth(op0 v c0) - c1) == c0
6011 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6012 ExtractFromAmt.getBitWidth()))
6013 return SDValue();
6014 }
6015
6016 // Return the expanded shift op that should allow a rotate to be formed.
6017 EVT ShiftVT = OppShift.getOperand(1).getValueType();
6018 EVT ResVT = ExtractFrom.getValueType();
6019 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6020 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6021}
6022
6023// Return true if we can prove that, whenever Neg and Pos are both in the
6024// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
6025// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6026//
6027// (or (shift1 X, Neg), (shift2 X, Pos))
6028//
6029// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6030// in direction shift1 by Neg. The range [0, EltSize) means that we only need
6031// to consider shift amounts with defined behavior.
6032static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6033 SelectionDAG &DAG) {
6034 // If EltSize is a power of 2 then:
6035 //
6036 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6037 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6038 //
6039 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6040 // for the stronger condition:
6041 //
6042 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
6043 //
6044 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6045 // we can just replace Neg with Neg' for the rest of the function.
6046 //
6047 // In other cases we check for the even stronger condition:
6048 //
6049 // Neg == EltSize - Pos [B]
6050 //
6051 // for all Neg and Pos. Note that the (or ...) then invokes undefined
6052 // behavior if Pos == 0 (and consequently Neg == EltSize).
6053 //
6054 // We could actually use [A] whenever EltSize is a power of 2, but the
6055 // only extra cases that it would match are those uninteresting ones
6056 // where Neg and Pos are never in range at the same time. E.g. for
6057 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6058 // as well as (sub 32, Pos), but:
6059 //
6060 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6061 //
6062 // always invokes undefined behavior for 32-bit X.
6063 //
6064 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6065 unsigned MaskLoBits = 0;
6066 if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6067 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6068 KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6069 unsigned Bits = Log2_64(EltSize);
6070 if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6071 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6072 Neg = Neg.getOperand(0);
6073 MaskLoBits = Bits;
6074 }
6075 }
6076 }
6077
6078 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6079 if (Neg.getOpcode() != ISD::SUB)
6080 return false;
6082 if (!NegC)
6083 return false;
6084 SDValue NegOp1 = Neg.getOperand(1);
6085
6086 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6087 // Pos'. The truncation is redundant for the purpose of the equality.
6088 if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6089 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6090 KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6091 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6092 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6093 MaskLoBits))
6094 Pos = Pos.getOperand(0);
6095 }
6096 }
6097
6098 // The condition we need is now:
6099 //
6100 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6101 //
6102 // If NegOp1 == Pos then we need:
6103 //
6104 // EltSize & Mask == NegC & Mask
6105 //
6106 // (because "x & Mask" is a truncation and distributes through subtraction).
6107 APInt Width;
6108 if (Pos == NegOp1)
6109 Width = NegC->getAPIntValue();
6110
6111 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6112 // Then the condition we want to prove becomes:
6113 //
6114 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6115 //
6116 // which, again because "x & Mask" is a truncation, becomes:
6117 //
6118 // NegC & Mask == (EltSize - PosC) & Mask
6119 // EltSize & Mask == (NegC + PosC) & Mask
6120 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6121 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6122 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6123 else
6124 return false;
6125 } else
6126 return false;
6127
6128 // Now we just need to check that EltSize & Mask == Width & Mask.
6129 if (MaskLoBits)
6130 // EltSize & Mask is 0 since Mask is EltSize - 1.
6131 return Width.getLoBits(MaskLoBits) == 0;
6132 return Width == EltSize;
6133}
6134
6135// A subroutine of MatchRotate used once we have found an OR of two opposite
6136// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
6137// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6138// former being preferred if supported. InnerPos and InnerNeg are Pos and
6139// Neg with outer conversions stripped away.
6140SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6141 SDValue Neg, SDValue InnerPos,
6142 SDValue InnerNeg, unsigned PosOpcode,
6143 unsigned NegOpcode, const SDLoc &DL) {
6144 // fold (or (shl x, (*ext y)),
6145 // (srl x, (*ext (sub 32, y)))) ->
6146 // (rotl x, y) or (rotr x, (sub 32, y))
6147 //
6148 // fold (or (shl x, (*ext (sub 32, y))),
6149 // (srl x, (*ext y))) ->
6150 // (rotr x, y) or (rotl x, (sub 32, y))
6151 EVT VT = Shifted.getValueType();
6152 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6153 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6154 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6155 HasPos ? Pos : Neg).getNode();
6156 }
6157
6158 return nullptr;
6159}
6160
6161// MatchRotate - Handle an 'or' of two operands. If this is one of the many
6162// idioms for rotate, and if the target supports rotation instructions, generate
6163// a rot[lr].
6164SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6165 // Must be a legal type. Expanded 'n promoted things won't work with rotates.
6166 EVT VT = LHS.getValueType();
6167 if (!TLI.isTypeLegal(VT)) return nullptr;
6168
6169 // The target must have at least one rotate flavor.
6170 bool HasROTL = hasOperation(ISD::ROTL, VT);
6171 bool HasROTR = hasOperation(ISD::ROTR, VT);
6172 if (!HasROTL && !HasROTR) return nullptr;
6173
6174 // Check for truncated rotate.
6175 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6176 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6177 assert(LHS.getValueType() == RHS.getValueType());
6178 if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6179 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
6180 SDValue(Rot, 0)).getNode();
6181 }
6182 }
6183
6184 // Match "(X shl/srl V1) & V2" where V2 may not be present.
6185 SDValue LHSShift; // The shift.
6186 SDValue LHSMask; // AND value if any.
6187 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6188
6189 SDValue RHSShift; // The shift.
6190 SDValue RHSMask; // AND value if any.
6191 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6192
6193 // If neither side matched a rotate half, bail
6194 if (!LHSShift && !RHSShift)
6195 return nullptr;
6196
6197 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6198 // side of the rotate, so try to handle that here. In all cases we need to
6199 // pass the matched shift from the opposite side to compute the opcode and
6200 // needed shift amount to extract. We still want to do this if both sides
6201 // matched a rotate half because one half may be a potential overshift that
6202 // can be broken down (ie if InstCombine merged two shl or srl ops into a
6203 // single one).
6204
6205 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6206 if (LHSShift)
6207 if (SDValue NewRHSShift =
6208 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6209 RHSShift = NewRHSShift;
6210 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6211 if (RHSShift)
6212 if (SDValue NewLHSShift =
6213 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6214 LHSShift = NewLHSShift;
6215
6216 // If a side is still missing, nothing else we can do.
6217 if (!RHSShift || !LHSShift)
6218 return nullptr;
6219
6220 // At this point we've matched or extracted a shift op on each side.
6221
6222 if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
6223 return nullptr; // Not shifting the same value.
6224
6225 if (LHSShift.getOpcode() == RHSShift.getOpcode())
6226 return nullptr; // Shifts must disagree.
6227
6228 // Canonicalize shl to left side in a shl/srl pair.
6229 if (RHSShift.getOpcode() == ISD::SHL) {
6230 std::swap(LHS, RHS);
6231 std::swap(LHSShift, RHSShift);
6232 std::swap(LHSMask, RHSMask);
6233 }
6234
6235 unsigned EltSizeInBits = VT.getScalarSizeInBits();
6236 SDValue LHSShiftArg = LHSShift.getOperand(0);
6237 SDValue LHSShiftAmt = LHSShift.getOperand(1);
6238 SDValue RHSShiftArg = RHSShift.getOperand(0);
6239 SDValue RHSShiftAmt = RHSShift.getOperand(1);
6240
6241 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6242 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6243 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6244 ConstantSDNode *RHS) {
6245 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6246 };
6247 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6248 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
6249 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
6250
6251 // If there is an AND of either shifted operand, apply it to the result.
6252 if (LHSMask.getNode() || RHSMask.getNode()) {
6253 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6254 SDValue Mask = AllOnes;
6255
6256 if (LHSMask.getNode()) {
6257 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6258 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6259 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6260 }
6261 if (RHSMask.getNode()) {
6262 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6263 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6264 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6265 }
6266
6267 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
6268 }
6269
6270 return Rot.getNode();
6271 }
6272
6273 // If there is a mask here, and we have a variable shift, we can't be sure
6274 // that we're masking out the right stuff.
6275 if (LHSMask.getNode() || RHSMask.getNode())
6276 return nullptr;
6277
6278 // If the shift amount is sign/zext/any-extended just peel it off.
6279 SDValue LExtOp0 = LHSShiftAmt;
6280 SDValue RExtOp0 = RHSShiftAmt;
6281 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6282 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6283 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6284 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6285 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6286 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6287 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6288 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6289 LExtOp0 = LHSShiftAmt.getOperand(0);
6290 RExtOp0 = RHSShiftAmt.getOperand(0);
6291 }
6292
6293 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6294 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6295 if (TryL)
6296 return TryL;
6297
6298 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6299 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6300 if (TryR)
6301 return TryR;
6302
6303 return nullptr;
6304}
6305
6306namespace {
6307
6308/// Represents known origin of an individual byte in load combine pattern. The
6309/// value of the byte is either constant zero or comes from memory.
6310struct ByteProvider {
6311 // For constant zero providers Load is set to nullptr. For memory providers
6312 // Load represents the node which loads the byte from memory.
6313 // ByteOffset is the offset of the byte in the value produced by the load.
6314 LoadSDNode *Load = nullptr;
6315 unsigned ByteOffset = 0;
6316
6317 ByteProvider() = default;
6318
6319 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6320 return ByteProvider(Load, ByteOffset);
6321 }
6322
6323 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6324
6325 bool isConstantZero() const { return !Load; }
6326 bool isMemory() const { return Load; }
6327
6328 bool operator==(const ByteProvider &Other) const {
6329 return Other.Load == Load && Other.ByteOffset == ByteOffset;
6330 }
6331
6332private:
6333 ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6334 : Load(Load), ByteOffset(ByteOffset) {}
6335};
6336
6337} // end anonymous namespace
6338
6339/// Recursively traverses the expression calculating the origin of the requested
6340/// byte of the given value. Returns None if the provider can't be calculated.
6341///
6342/// For all the values except the root of the expression verifies that the value
6343/// has exactly one use and if it's not true return None. This way if the origin
6344/// of the byte is returned it's guaranteed that the values which contribute to
6345/// the byte are not used outside of this expression.
6346///
6347/// Because the parts of the expression are not allowed to have more than one
6348/// use this function iterates over trees, not DAGs. So it never visits the same
6349/// node more than once.
6350static const Optional<ByteProvider>
6351calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6352 bool Root = false) {
6353 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6354 if (Depth == 10)
6355 return None;
6356
6357 if (!Root && !Op.hasOneUse())
6358 return None;
6359
6360 assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6361 unsigned BitWidth = Op.getValueSizeInBits();
6362 if (BitWidth % 8 != 0)
6363 return None;
6364 unsigned ByteWidth = BitWidth / 8;
6365 assert(Index < ByteWidth && "invalid index requested");
6366 (void) ByteWidth;
6367
6368 switch (Op.getOpcode()) {
6369 case ISD::OR: {
6370 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6371 if (!LHS)
6372 return None;
6373 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6374 if (!RHS)
6375 return None;
6376
6377 if (LHS->isConstantZero())
6378 return RHS;
6379 if (RHS->isConstantZero())
6380 return LHS;
6381 return None;
6382 }
6383 case ISD::SHL: {
6384 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6385 if (!ShiftOp)
6386 return None;
6387
6388 uint64_t BitShift = ShiftOp->getZExtValue();
6389 if (BitShift % 8 != 0)
6390 return None;
6391 uint64_t ByteShift = BitShift / 8;
6392
6393 return Index < ByteShift
6394 ? ByteProvider::getConstantZero()
6395 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6396 Depth + 1);
6397 }
6398 case ISD::ANY_EXTEND:
6399 case ISD::SIGN_EXTEND:
6400 case ISD::ZERO_EXTEND: {
6401 SDValue NarrowOp = Op->getOperand(0);
6402 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6403 if (NarrowBitWidth % 8 != 0)
6404 return None;
6405 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6406
6407 if (Index >= NarrowByteWidth)
6408 return Op.getOpcode() == ISD::ZERO_EXTEND
6409 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6410 : None;
6411 return calculateByteProvider(NarrowOp, Index, Depth + 1);
6412 }
6413 case ISD::BSWAP:
6414 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6415 Depth + 1);
6416 case ISD::LOAD: {
6417 auto L = cast<LoadSDNode>(Op.getNode());
6418 if (L->isVolatile() || L->isIndexed())
6419 return None;
6420
6421 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6422 if (NarrowBitWidth % 8 != 0)
6423 return None;
6424 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6425
6426 if (Index >= NarrowByteWidth)
6427 return L->getExtensionType() == ISD::ZEXTLOAD
6428 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6429 : None;
6430 return ByteProvider::getMemory(L, Index);
6431 }
6432 }
6433
6434 return None;
6435}
6436
6437static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6438 return i;
6439}
6440
6441static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6442 return BW - i - 1;
6443}
6444
6445// Check if the bytes offsets we are looking at match with either big or
6446// little endian value loaded. Return true for big endian, false for little
6447// endian, and None if match failed.
6449 int64_t FirstOffset) {
6450 // The endian can be decided only when it is 2 bytes at least.
6451 unsigned Width = ByteOffsets.size();
6452 if (Width < 2)
6453 return None;
6454
6455 bool BigEndian = true, LittleEndian = true;
6456 for (unsigned i = 0; i < Width; i++) {
6457 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6458 LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6459 BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6460 if (!BigEndian && !LittleEndian)
6461 return None;
6462 }
6463
6464 assert((BigEndian != LittleEndian) && "It should be either big endian or"
6465 "little endian");
6466 return BigEndian;
6467}
6468
6470 switch (Value.getOpcode()) {
6471 case ISD::TRUNCATE:
6472 case ISD::ZERO_EXTEND:
6473 case ISD::SIGN_EXTEND:
6474 case ISD::ANY_EXTEND:
6475 return stripTruncAndExt(Value.getOperand(0));
6476 }
6477 return Value;
6478}
6479
6480/// Match a pattern where a wide type scalar value is stored by several narrow
6481/// stores. Fold it into a single store or a BSWAP and a store if the targets
6482/// supports it.
6483///
6484/// Assuming little endian target:
6485/// i8 *p = ...
6486/// i32 val = ...
6487/// p[0] = (val >> 0) & 0xFF;
6488/// p[1] = (val >> 8) & 0xFF;
6489/// p[2] = (val >> 16) & 0xFF;
6490/// p[3] = (val >> 24) & 0xFF;
6491/// =>
6492/// *((i32)p) = val;
6493///
6494/// i8 *p = ...
6495/// i32 val = ...
6496/// p[0] = (val >> 24) & 0xFF;
6497/// p[1] = (val >> 16) & 0xFF;
6498/// p[2] = (val >> 8) & 0xFF;
6499/// p[3] = (val >> 0) & 0xFF;
6500/// =>
6501/// *((i32)p) = BSWAP(val);
6502SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
6503 // Collect all the stores in the chain.
6504 SDValue Chain;
6506 for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
6507 if (Store->getMemoryVT() != MVT::i8 ||
6508 Store->isVolatile() || Store->isIndexed())
6509 return SDValue();
6510 Stores.push_back(Store);
6511 Chain = Store->getChain();
6512 }
6513 // Handle the simple type only.
6514 unsigned Width = Stores.size();
6516 *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
6517 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6518 return SDValue();
6519
6520 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6521 if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
6522 return SDValue();
6523
6524 // Check if all the bytes of the combined value we are looking at are stored
6525 // to the same base address. Collect bytes offsets from Base address into
6526 // ByteOffsets.
6527 SDValue CombinedValue;
6528 SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
6529 int64_t FirstOffset = INT64_MAX;
6530 StoreSDNode *FirstStore = nullptr;
6532 for (auto Store : Stores) {
6533 // All the stores store different byte of the CombinedValue. A truncate is
6534 // required to get that byte value.
6535 SDValue Trunc = Store->getValue();
6536 if (Trunc.getOpcode() != ISD::TRUNCATE)
6537 return SDValue();
6538 // A shift operation is required to get the right byte offset, except the
6539 // first byte.
6540 int64_t Offset = 0;
6541 SDValue Value = Trunc.getOperand(0);
6542 if (Value.getOpcode() == ISD::SRL ||
6543 Value.getOpcode() == ISD::SRA) {
6544 ConstantSDNode *ShiftOffset =
6545 dyn_cast<ConstantSDNode>(Value.getOperand(1));
6546 // Trying to match the following pattern. The shift offset must be
6547 // a constant and a multiple of 8. It is the byte offset in "y".
6548 //
6549 // x = srl y, offset
6550 // i8 z = trunc x
6551 // store z, ...
6552 if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
6553 return SDValue();
6554
6555 Offset = ShiftOffset->getSExtValue()/8;
6556 Value = Value.getOperand(0);
6557 }
6558
6559 // Stores must share the same combined value with different offsets.
6560 if (!CombinedValue)
6561 CombinedValue = Value;
6562 else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
6563 return SDValue();
6564
6565 // The trunc and all the extend operation should be stripped to get the
6566 // real value we are stored.
6567 else if (CombinedValue.getValueType() != VT) {
6568 if (Value.getValueType() == VT ||
6569 Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
6570 CombinedValue = Value;
6571 // Give up if the combined value type is smaller than the store size.
6572 if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
6573 return SDValue();
6574 }
6575
6576 // Stores must share the same base address
6577 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
6578 int64_t ByteOffsetFromBase = 0;
6579 if (!Base)
6580 Base = Ptr;
6581 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6582 return SDValue();
6583
6584 // Remember the first byte store
6585 if (ByteOffsetFromBase < FirstOffset) {
6586 FirstStore = Store;
6587 FirstOffset = ByteOffsetFromBase;
6588 }
6589 // Map the offset in the store and the offset in the combined value, and
6590 // early return if it has been set before.
6591 if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
6592 return SDValue();
6593 ByteOffsets[Offset] = ByteOffsetFromBase;
6594 }
6595
6596 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6597 assert(FirstStore && "First store must be set");
6598
6599 // Check if the bytes of the combined value we are looking at match with
6600 // either big or little endian value store.
6601 Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6602 if (!IsBigEndian.hasValue())
6603 return SDValue();
6604
6605 // The node we are looking at matches with the pattern, check if we can
6606 // replace it with a single bswap if needed and store.
6607
6608 // If the store needs byte swap check if the target supports it
6609 bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
6610
6611 // Before legalize we can introduce illegal bswaps which will be later
6612 // converted to an explicit bswap sequence. This way we end up with a single
6613 // store and byte shuffling instead of several stores and byte shuffling.
6614 if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6615 return SDValue();
6616
6617 // Check that a store of the wide type is both allowed and fast on the target
6618 bool Fast = false;
6619 bool Allowed =
6620 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
6621 *FirstStore->getMemOperand(), &Fast);
6622 if (!Allowed || !Fast)
6623 return SDValue();
6624
6625 if (VT != CombinedValue.getValueType()) {
6626 assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
6627 "Get unexpected store value to combine");
6628 CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
6629 CombinedValue);
6630 }
6631
6632 if (NeedsBswap)
6633 CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
6634
6635 SDValue NewStore =
6636 DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(),
6637 FirstStore->getPointerInfo(), FirstStore->getAlignment());
6638
6639 // Rely on other DAG combine rules to remove the other individual stores.
6640 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
6641 return NewStore;
6642}
6643
6644/// Match a pattern where a wide type scalar value is loaded by several narrow
6645/// loads and combined by shifts and ors. Fold it into a single load or a load
6646/// and a BSWAP if the targets supports it.
6647///
6648/// Assuming little endian target:
6649/// i8 *a = ...
6650/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6651/// =>
6652/// i32 val = *((i32)a)
6653///
6654/// i8 *a = ...
6655/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
6656/// =>
6657/// i32 val = BSWAP(*((i32)a))
6658///
6659/// TODO: This rule matches complex patterns with OR node roots and doesn't
6660/// interact well with the worklist mechanism. When a part of the pattern is
6661/// updated (e.g. one of the loads) its direct users are put into the worklist,
6662/// but the root node of the pattern which triggers the load combine is not
6663/// necessarily a direct user of the changed node. For example, once the address
6664/// of t28 load is reassociated load combine won't be triggered:
6665/// t25: i32 = add t4, Constant:i32<2>
6666/// t26: i64 = sign_extend t25
6667/// t27: i64 = add t2, t26
6668/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
6669/// t29: i32 = zero_extend t28
6670/// t32: i32 = shl t29, Constant:i8<8>
6671/// t33: i32 = or t23, t32
6672/// As a possible fix visitLoad can check if the load can be a part of a load
6673/// combine pattern and add corresponding OR roots to the worklist.
6674SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
6675 assert(N->getOpcode() == ISD::OR &&
6676 "Can only match load combining against OR nodes");
6677
6678 // Handles simple types only
6679 EVT VT = N->getValueType(0);
6680 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6681 return SDValue();
6682 unsigned ByteWidth = VT.getSizeInBits() / 8;
6683
6684 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6685 // Before legalize we can introduce too wide illegal loads which will be later
6686 // split into legal sized loads. This enables us to combine i64 load by i8
6687 // patterns to a couple of i32 loads on 32 bit targets.
6688 if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
6689 return SDValue();
6690
6691 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
6692 auto MemoryByteOffset = [&] (ByteProvider P) {
6693 assert(P.isMemory() && "Must be a memory byte provider");
6694 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
6695 assert(LoadBitWidth % 8 == 0 &&
6696 "can only analyze providers for individual bytes not bit");
6697 unsigned LoadByteWidth = LoadBitWidth / 8;
6698 return IsBigEndianTarget
6699 ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
6700 : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
6701 };
6702
6704 SDValue Chain;
6705
6707 Optional<ByteProvider> FirstByteProvider;
6708 int64_t FirstOffset = INT64_MAX;
6709
6710 // Check if all the bytes of the OR we are looking at are loaded from the same
6711 // base address. Collect bytes offsets from Base address in ByteOffsets.
6712 SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
6713 for (unsigned i = 0; i < ByteWidth; i++) {
6714 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
6715 if (!P || !P->isMemory()) // All the bytes must be loaded from memory
6716 return SDValue();
6717
6718 LoadSDNode *L = P->Load;
6719 assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
6720 "Must be enforced by calculateByteProvider");
6721 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
6722
6723 // All loads must share the same chain
6724 SDValue LChain = L->getChain();
6725 if (!Chain)
6726 Chain = LChain;
6727 else if (Chain != LChain)
6728 return SDValue();
6729
6730 // Loads must share the same base address
6732 int64_t ByteOffsetFromBase = 0;
6733 if (!Base)
6734 Base = Ptr;
6735 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6736 return SDValue();
6737
6738 // Calculate the offset of the current byte from the base address
6739 ByteOffsetFromBase += MemoryByteOffset(*P);
6740 ByteOffsets[i] = ByteOffsetFromBase;
6741
6742 // Remember the first byte load
6743 if (ByteOffsetFromBase < FirstOffset) {
6744 FirstByteProvider = P;
6745 FirstOffset = ByteOffsetFromBase;
6746 }
6747
6748 Loads.insert(L);
6749 }
6750 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
6751 "memory, so there must be at least one load which produces the value");
6752 assert(Base && "Base address of the accessed memory location must be set");
6753 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6754
6755 // Check if the bytes of the OR we are looking at match with either big or
6756 // little endian value load
6757 Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6758 if (!IsBigEndian.hasValue())
6759 return SDValue();
6760
6761 assert(FirstByteProvider && "must be set");
6762
6763 // Ensure that the first byte is loaded from zero offset of the first load.
6764 // So the combined value can be loaded from the first load address.
6765 if (MemoryByteOffset(*FirstByteProvider) != 0)
6766 return SDValue();
6767 LoadSDNode *FirstLoad = FirstByteProvider->Load;
6768
6769 // The node we are looking at matches with the pattern, check if we can
6770 // replace it with a single load and bswap if needed.
6771
6772 // If the load needs byte swap check if the target supports it
6773 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
6774
6775 // Before legalize we can introduce illegal bswaps which will be later
6776 // converted to an explicit bswap sequence. This way we end up with a single
6777 // load and byte shuffling instead of several loads and byte shuffling.
6778 if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6779 return SDValue();
6780
6781 // Check that a load of the wide type is both allowed and fast on the target
6782 bool Fast = false;
6783 bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
6784 VT, *FirstLoad->getMemOperand(), &Fast);
6785 if (!Allowed || !Fast)
6786 return SDValue();
6787
6788 SDValue NewLoad =
6789 DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
6790 FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
6791
6792 // Transfer chain users from old loads to the new load.
6793 for (LoadSDNode *L : Loads)
6794 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6795
6796 return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
6797}
6798
6799// If the target has andn, bsl, or a similar bit-select instruction,
6800// we want to unfold masked merge, with canonical pattern of:
6801// | A | |B|
6802// ((x ^ y) & m) ^ y
6803// | D |
6804// Into:
6805// (x & m) | (y & ~m)
6806// If y is a constant, and the 'andn' does not work with immediates,
6807// we unfold into a different pattern:
6808// ~(~x & m) & (m | y)
6809// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6810// the very least that breaks andnpd / andnps patterns, and because those
6811// patterns are simplified in IR and shouldn't be created in the DAG
6812SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
6813 assert(N->getOpcode() == ISD::XOR);
6814
6815 // Don't touch 'not' (i.e. where y = -1).
6816 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
6817 return SDValue();
6818
6819 EVT VT = N->getValueType(0);
6820
6821 // There are 3 commutable operators in the pattern,
6822 // so we have to deal with 8 possible variants of the basic pattern.
6823 SDValue X, Y, M;
6824 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
6825 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
6826 return false;
6827 SDValue Xor = And.getOperand(XorIdx);
6828 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
6829 return false;
6830 SDValue Xor0 = Xor.getOperand(0);
6831 SDValue Xor1 = Xor.getOperand(1);
6832 // Don't touch 'not' (i.e. where y = -1).
6833 if (isAllOnesOrAllOnesSplat(Xor1))
6834 return false;
6835 if (Other == Xor0)
6836 std::swap(Xor0, Xor1);
6837 if (Other != Xor1)
6838 return false;
6839 X = Xor0;
6840 Y = Xor1;
6841 M = And.getOperand(XorIdx ? 0 : 1);
6842 return true;
6843 };
6844
6845 SDValue N0 = N->getOperand(0);
6846 SDValue N1 = N->getOperand(1);
6847 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
6848 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
6849 return SDValue();
6850
6851 // Don't do anything if the mask is constant. This should not be reachable.
6852 // InstCombine should have already unfolded this pattern, and DAGCombiner
6853 // probably shouldn't produce it, too.
6854 if (isa<ConstantSDNode>(M.getNode()))
6855 return SDValue();
6856
6857 // We can transform if the target has AndNot
6858 if (!TLI.hasAndNot(M))
6859 return SDValue();
6860
6861 SDLoc DL(N);
6862
6863 // If Y is a constant, check that 'andn' works with immediates.
6864 if (!TLI.hasAndNot(Y)) {
6865 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
6866 // If not, we need to do a bit more work to make sure andn is still used.
6867 SDValue NotX = DAG.getNOT(DL, X, VT);
6868 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
6869 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
6870 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
6871 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
6872 }
6873
6874 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
6875 SDValue NotM = DAG.getNOT(DL, M, VT);
6876 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
6877
6878 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
6879}
6880
6881SDValue DAGCombiner::visitXOR(SDNode *N) {
6882 SDValue N0 = N->getOperand(0);
6883 SDValue N1 = N->getOperand(1);
6884 EVT VT = N0.getValueType();
6885
6886 // fold vector ops
6887 if (VT.isVector()) {
6888 if (SDValue FoldedVOp = SimplifyVBinOp(N))
6889 return FoldedVOp;
6890
6891 // fold (xor x, 0) -> x, vector edition
6893 return N1;
6895 return N0;
6896 }
6897
6898 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6899 SDLoc DL(N);
6900 if (N0.isUndef() && N1.isUndef())
6901 return DAG.getConstant(0, DL, VT);
6902 // fold (xor x, undef) -> undef
6903 if (N0.isUndef())
6904 return N0;
6905 if (N1.isUndef())
6906 return N1;
6907 // fold (xor c1, c2) -> c1^c2
6910 if (N0C && N1C)
6911 return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
6912 // canonicalize constant to RHS
6915 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
6916 // fold (xor x, 0) -> x
6917 if (isNullConstant(N1))
6918 return N0;
6919
6920 if (SDValue NewSel = foldBinOpIntoSelect(N))
6921 return NewSel;
6922
6923 // reassociate xor
6924 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
6925 return RXOR;
6926
6927 // fold !(x cc y) -> (x !cc y)
6928 unsigned N0Opcode = N0.getOpcode();
6929 SDValue LHS, RHS, CC;
6930 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6931 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6932 LHS.getValueType().isInteger());
6933 if (!LegalOperations ||
6934 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6935 switch (N0Opcode) {
6936 default:
6937 llvm_unreachable("Unhandled SetCC Equivalent!");
6938 case ISD::SETCC:
6939 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6940 case ISD::SELECT_CC:
6941 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6942 N0.getOperand(3), NotCC);
6943 }
6944 }
6945 }
6946
6947 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6948 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6949 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6950 SDValue V = N0.getOperand(0);
6951 SDLoc DL0(N0);
6952 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
6953 DAG.getConstant(1, DL0, V.getValueType()));
6954 AddToWorklist(V.getNode());
6955 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
6956 }
6957
6958 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6959 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6960 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6961 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6962 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
6963 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6964 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6965 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6966 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6967 return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6968 }
6969 }
6970 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6971 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6972 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6973 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6974 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
6975 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6976 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6977 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6978 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6979 return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6980 }
6981 }
6982
6983 // fold (not (neg x)) -> (add X, -1)
6984 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
6985 // Y is a constant or the subtract has a single use.
6986 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
6987 isNullConstant(N0.getOperand(0))) {
6988 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
6989 DAG.getAllOnesConstant(DL, VT));
6990 }
6991
6992 // fold (xor (and x, y), y) -> (and (not x), y)
6993 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
6994 SDValue X = N0.getOperand(0);
6995 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6996 AddToWorklist(NotX.getNode());
6997 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
6998 }
6999
7000 if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7003 unsigned BitWidth = VT.getScalarSizeInBits();
7004 if (XorC && ShiftC) {
7005 // Don't crash on an oversized shift. We can not guarantee that a bogus
7006 // shift has been simplified to undef.
7007 uint64_t ShiftAmt = ShiftC->getLimitedValue();
7008 if (ShiftAmt < BitWidth) {
7009 APInt Ones = APInt::getAllOnesValue(BitWidth);
7010 Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7011 if (XorC->getAPIntValue() == Ones) {
7012 // If the xor constant is a shifted -1, do a 'not' before the shift:
7013 // xor (X << ShiftC), XorC --> (not X) << ShiftC
7014 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7015 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7016 return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7017 }
7018 }
7019 }
7020 }
7021
7022 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7023 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7024 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7025 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7026 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7027 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7028 SDValue S0 = S.getOperand(0);
7029 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
7030 unsigned OpSizeInBits = VT.getScalarSizeInBits();
7032 if (C->getAPIntValue() == (OpSizeInBits - 1))
7033 return DAG.getNode(ISD::ABS, DL, VT, S0);
7034 }
7035 }
7036 }
7037
7038 // fold (xor x, x) -> 0
7039 if (N0 == N1)
7040 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7041
7042 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7043 // Here is a concrete example of this equivalence:
7044 // i16 x == 14
7045 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
7046 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7047 //
7048 // =>
7049 //
7050 // i16 ~1 == 0b1111111111111110
7051 // i16 rol(~1, 14) == 0b1011111111111111
7052 //
7053 // Some additional tips to help conceptualize this transform:
7054 // - Try to see the operation as placing a single zero in a value of all ones.
7055 // - There exists no value for x which would allow the result to contain zero.
7056 // - Values of x larger than the bitwidth are undefined and do not require a
7057 // consistent result.
7058 // - Pushing the zero left requires shifting one bits in from the right.
7059 // A rotate left of ~1 is a nice way of achieving the desired result.
7060 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7062 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7063 N0.getOperand(1));
7064 }
7065
7066 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
7067 if (N0Opcode == N1.getOpcode())
7068 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7069 return V;
7070
7071 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
7072 if (SDValue MM = unfoldMaskedMerge(N))
7073 return MM;
7074
7075 // Simplify the expression using non-local knowledge.
7077 return SDValue(N, 0);
7078
7079 return SDValue();
7080}
7081
7082/// Handle transforms common to the three shifts, when the shift amount is a
7083/// constant.
7084/// We are looking for: (shift being one of shl/sra/srl)
7085/// shift (binop X, C0), C1
7086/// And want to transform into:
7087/// binop (shift X, C1), (shift C0, C1)
7088SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
7089 // Do not turn a 'not' into a regular xor.
7090 if (isBitwiseNot(N->getOperand(0)))
7091 return SDValue();
7092
7093 // The inner binop must be one-use, since we want to replace it.
7094 SDNode *LHS = N->getOperand(0).getNode();
7095 if (!LHS->hasOneUse()) return SDValue();
7096
7097 // We want to pull some binops through shifts, so that we have (and (shift))
7098 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
7099 // thing happens with address calculations, so it's important to canonicalize
7100 // it.
7101 switch (LHS->getOpcode()) {
7102 default:
7103 return SDValue();
7104 case ISD::OR:
7105 case ISD::XOR:
7106 case ISD::AND:
7107 break;
7108 case ISD::ADD:
7109 if (N->getOpcode() != ISD::SHL)
7110 return SDValue(); // only shl(add) not sr[al](add).
7111 break;
7112 }
7113
7114 // We require the RHS of the binop to be a constant and not opaque as well.
7116 if (!BinOpCst)
7117 return SDValue();
7118
7119 // FIXME: disable this unless the input to the binop is a shift by a constant
7120 // or is copy/select. Enable this in other cases when figure out it's exactly
7121 // profitable.
7122 SDValue BinOpLHSVal = LHS->getOperand(0);
7123 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
7124 BinOpLHSVal.getOpcode() == ISD::SRA ||
7125 BinOpLHSVal.getOpcode() == ISD::SRL) &&
7126 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
7127 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
7128 BinOpLHSVal.getOpcode() == ISD::SELECT;
7129
7130 if (!IsShiftByConstant && !IsCopyOrSelect)
7131 return SDValue();
7132
7133 if (IsCopyOrSelect && N->hasOneUse())
7134 return SDValue();
7135
7136 EVT VT = N->getValueType(0);
7137
7138 if (!TLI.isDesirableToCommuteWithShift(N, Level))
7139 return SDValue();
7140
7141 // Fold the constants, shifting the binop RHS by the shift amount.
7142 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
7143 N->getValueType(0),
7144 LHS->getOperand(1), N->getOperand(1));
7145 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
7146
7147 // Create the new shift.
7148 SDValue NewShift = DAG.getNode(N->getOpcode(),
7149 SDLoc(LHS->getOperand(0)),
7150 VT, LHS->getOperand(0), N->getOperand(1));
7151
7152 // Create the new binop.
7153 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
7154}
7155
7156SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
7157 assert(N->getOpcode() == ISD::TRUNCATE);
7158 assert(N->getOperand(0).getOpcode() == ISD::AND);
7159
7160 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
7161 EVT TruncVT = N->getValueType(0);
7162 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
7163 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
7164 SDValue N01 = N->getOperand(0).getOperand(1);
7165 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
7166 SDLoc DL(N);
7167 SDValue N00 = N->getOperand(0).getOperand(0);
7168 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
7169 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
7170 AddToWorklist(Trunc00.getNode());
7171 AddToWorklist(Trunc01.getNode());
7172 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
7173 }
7174 }
7175
7176 return SDValue();
7177}
7178
7179SDValue DAGCombiner::visitRotate(SDNode *N) {
7180 SDLoc dl(N);
7181 SDValue N0 = N->getOperand(0);
7182 SDValue N1 = N->getOperand(1);
7183 EVT VT = N->getValueType(0);
7184 unsigned Bitsize = VT.getScalarSizeInBits();
7185
7186 // fold (rot x, 0) -> x
7187 if (isNullOrNullSplat(N1))
7188 return N0;
7189
7190 // fold (rot x, c) -> x iff (c % BitSize) == 0
7191 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
7192 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
7193 if (DAG.MaskedValueIsZero(N1, ModuloMask))
7194 return N0;
7195 }
7196
7197 // fold (rot x, c) -> (rot x, c % BitSize)
7198 // TODO - support non-uniform vector amounts.
7199 if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
7200 if (Cst->getAPIntValue().uge(Bitsize)) {
7201 uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
7202 return DAG.getNode(N->getOpcode(), dl, VT, N0,
7203 DAG.getConstant(RotAmt, dl, N1.getValueType()));
7204 }
7205 }
7206
7207 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
7208 if (N1.getOpcode() == ISD::TRUNCATE &&
7209 N1.getOperand(0).getOpcode() == ISD::AND) {
7210 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7211 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
7212 }
7213
7214 unsigned NextOp = N0.getOpcode();
7215 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
7216 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
7219 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
7220 EVT ShiftVT = C1->getValueType(0);
7221 bool SameSide = (N->getOpcode() == NextOp);
7222 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
7223 if (SDValue CombinedShift =
7224 DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
7225 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
7226 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
7227 ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
7228 BitsizeC.getNode());
7229 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
7230 CombinedShiftNorm);
7231 }
7232 }
7233 }
7234 return SDValue();
7235}
7236
7237SDValue DAGCombiner::visitSHL(SDNode *N) {
7238 SDValue N0 = N->getOperand(0);
7239 SDValue N1 = N->getOperand(1);
7240 if (SDValue V = DAG.simplifyShift(N0, N1))
7241 return V;
7242
7243 EVT VT = N0.getValueType();
7244 EVT ShiftVT = N1.getValueType();
7245 unsigned OpSizeInBits = VT.getScalarSizeInBits();
7246
7247 // fold vector ops
7248 if (VT.isVector()) {
7249 if (SDValue FoldedVOp = SimplifyVBinOp(N))
7250 return FoldedVOp;
7251
7252 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
7253 // If setcc produces all-one true value then:
7254 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
7255 if (N1CV && N1CV->isConstant()) {
7256 if (N0.getOpcode() == ISD::AND) {
7257 SDValue N00 = N0->getOperand(0);
7258 SDValue N01 = N0->getOperand(1);
7259 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
7260
7261 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
7265 N01CV, N1CV))
7266 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
7267 }
7268 }
7269 }
7270 }
7271
7273
7274 // fold (shl c1, c2) -> c1<<c2
7275 // TODO - support non-uniform vector shift amounts.
7277 if (N0C && N1C && !N1C->isOpaque())
7278 return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
7279
7280 if (SDValue NewSel = foldBinOpIntoSelect(N))
7281 return NewSel;
7282
7283 // if (shl x, c) is known to be zero, return 0
7284 if (DAG.MaskedValueIsZero(SDValue(N, 0),
7285 APInt::getAllOnesValue(OpSizeInBits)))
7286 return DAG.getConstant(0, SDLoc(N), VT);
7287
7288 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
7289 if (N1.getOpcode() == ISD::TRUNCATE &&
7290 N1.getOperand(0).getOpcode() == ISD::AND) {
7291 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7292 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
7293 }
7294
7295 // TODO - support non-uniform vector shift amounts.
7296 if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7297 return SDValue(N, 0);
7298
7299 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
7300 if (N0.getOpcode() == ISD::SHL) {
7301 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7302 ConstantSDNode *RHS) {
7303 APInt c1 = LHS->getAPIntValue();
7304 APInt c2 = RHS->getAPIntValue();
7305 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7306 return (c1 + c2).uge(OpSizeInBits);
7307 };
7308 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7309 return DAG.getConstant(0, SDLoc(N), VT);
7310
7311 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7312 ConstantSDNode *RHS) {
7313 APInt c1 = LHS->getAPIntValue();
7314 APInt c2 = RHS->getAPIntValue();
7315 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7316 return (c1 + c2).ult(OpSizeInBits);
7317 };
7318 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7319 SDLoc DL(N);
7320 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7321 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
7322 }
7323 }
7324
7325 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
7326 // For this to be valid, the second form must not preserve any of the bits
7327 // that are shifted out by the inner shift in the first form. This means
7328 // the outer shift size must be >= the number of bits added by the ext.
7329 // As a corollary, we don't care what kind of ext it is.
7330 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
7331 N0.getOpcode() == ISD::ANY_EXTEND ||
7332 N0.getOpcode() == ISD::SIGN_EXTEND) &&
7333 N0.getOperand(0).getOpcode() == ISD::SHL) {
7334 SDValue N0Op0 = N0.getOperand(0);
7335 SDValue InnerShiftAmt = N0Op0.getOperand(1);
7336 EVT InnerVT = N0Op0.getValueType();
7337 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
7338
7339 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7340 ConstantSDNode *RHS) {
7341 APInt c1 = LHS->getAPIntValue();
7342 APInt c2 = RHS->getAPIntValue();
7343 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7344 return c2.uge(OpSizeInBits - InnerBitwidth) &&
7345 (c1 + c2).uge(OpSizeInBits);
7346 };
7347 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
7348 /*AllowUndefs*/ false,
7349 /*AllowTypeMismatch*/ true))
7350 return DAG.getConstant(0, SDLoc(N), VT);
7351
7352 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7353 ConstantSDNode *RHS) {
7354 APInt c1 = LHS->getAPIntValue();
7355 APInt c2 = RHS->getAPIntValue();
7356 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7357 return c2.uge(OpSizeInBits - InnerBitwidth) &&
7358 (c1 + c2).ult(OpSizeInBits);
7359 };
7360 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
7361 /*AllowUndefs*/ false,
7362 /*AllowTypeMismatch*/ true)) {
7363 SDLoc DL(N);
7364 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
7365 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
7366 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
7367 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
7368 }
7369 }
7370
7371 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
7372 // Only fold this if the inner zext has no other uses to avoid increasing
7373 // the total number of instructions.
7374 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7375 N0.getOperand(0).getOpcode() == ISD::SRL) {
7376 SDValue N0Op0 = N0.getOperand(0);
7377 SDValue InnerShiftAmt = N0Op0.getOperand(1);
7378
7379 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7380 APInt c1 = LHS->getAPIntValue();
7381 APInt c2 = RHS->getAPIntValue();
7382 zeroExtendToMatch(c1, c2);
7383 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
7384 };
7385 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
7386 /*AllowUndefs*/ false,
7387 /*AllowTypeMismatch*/ true)) {
7388 SDLoc DL(N);
7389 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
7390 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
7391 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
7392 AddToWorklist(NewSHL.getNode());
7393 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
7394 }
7395 }
7396
7397 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
7398 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
7399 // TODO - support non-uniform vector shift amounts.
7400 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
7401 N0->getFlags().hasExact()) {
7402 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7403 uint64_t C1 = N0C1->getZExtValue();
7404 uint64_t C2 = N1C->getZExtValue();
7405 SDLoc DL(N);
7406 if (C1 <= C2)
7407 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7408 DAG.getConstant(C2 - C1, DL, ShiftVT));
7409 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
7410 DAG.getConstant(C1 - C2, DL, ShiftVT));
7411 }
7412 }
7413
7414 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
7415 // (and (srl x, (sub c1, c2), MASK)
7416 // Only fold this if the inner shift has no other uses -- if it does, folding
7417 // this will increase the total number of instructions.
7418 // TODO - drop hasOneUse requirement if c1 == c2?
7419 // TODO - support non-uniform vector shift amounts.
7420 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
7422 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7423 if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
7424 uint64_t c1 = N0C1->getZExtValue();
7425 uint64_t c2 = N1C->getZExtValue();
7426 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
7427 SDValue Shift;
7428 if (c2 > c1) {
7429 Mask <<= c2 - c1;
7430 SDLoc DL(N);
7431 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7432 DAG.getConstant(c2 - c1, DL, ShiftVT));
7433 } else {
7434 Mask.lshrInPlace(c1 - c2);
7435 SDLoc DL(N);
7436 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
7437 DAG.getConstant(c1 - c2, DL, ShiftVT));
7438 }
7439 SDLoc DL(N0);
7440 return DAG.getNode(ISD::AND, DL, VT, Shift,
7441 DAG.getConstant(Mask, DL, VT));
7442 }
7443 }
7444 }
7445
7446 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
7447 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
7448 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
7449 SDLoc DL(N);
7450 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
7451 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
7452 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
7453 }
7454
7455 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7456 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7457 // Variant of version done on multiply, except mul by a power of 2 is turned
7458 // into a shift.
7459 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
7460 N0.getNode()->hasOneUse() &&
7461 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7462 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
7463 TLI.isDesirableToCommuteWithShift(N, Level)) {
7464 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
7465 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7466 AddToWorklist(Shl0.getNode());
7467 AddToWorklist(Shl1.getNode());
7468 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
7469 }
7470
7471 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
7472 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
7473 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7474 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
7475 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7477 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
7478 }
7479
7480 if (N1C && !N1C->isOpaque())
7481 if (SDValue NewSHL = visitShiftByConstant(N, N1C))
7482 return NewSHL;
7483
7484 return SDValue();
7485}
7486
7487SDValue DAGCombiner::visitSRA(SDNode *N) {
7488 SDValue N0 = N->getOperand(0);
7489 SDValue N1 = N->getOperand(1);
7490 if (SDValue V = DAG.simplifyShift(N0, N1))
7491 return V;
7492
7493 EVT VT = N0.getValueType();
7494 unsigned OpSizeInBits = VT.getScalarSizeInBits();
7495
7496 // Arithmetic shifting an all-sign-bit value is a no-op.
7497 // fold (sra 0, x) -> 0
7498 // fold (sra -1, x) -> -1
7499 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
7500 return N0;
7501
7502 // fold vector ops
7503 if (VT.isVector())
7504 if (SDValue FoldedVOp = SimplifyVBinOp(N))
7505 return FoldedVOp;
7506
7508
7509 // fold (sra c1, c2) -> (sra c1, c2)
7510 // TODO - support non-uniform vector shift amounts.
7512 if (N0C && N1C && !N1C->isOpaque())
7513 return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
7514
7515 if (SDValue NewSel = foldBinOpIntoSelect(N))
7516 return NewSel;
7517
7518 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
7519 // sext_inreg.
7520 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
7521 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
7522 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
7523 if (VT.isVector())
7524 ExtVT = EVT::getVectorVT(*DAG.getContext(),
7525 ExtVT, VT.getVectorNumElements());
7526 if ((!LegalOperations ||
7528 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7529 N0.getOperand(0), DAG.getValueType(ExtVT));
7530 }
7531
7532 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
7533 // clamp (add c1, c2) to max shift.
7534 if (N0.getOpcode() == ISD::SRA) {
7535 SDLoc DL(N);
7536 EVT ShiftVT = N1.getValueType();
7537 EVT ShiftSVT = ShiftVT.getScalarType();
7538 SmallVector<SDValue, 16> ShiftValues;
7539
7540 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7541 APInt c1 = LHS->getAPIntValue();
7542 APInt c2 = RHS->getAPIntValue();
7543 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7544 APInt Sum = c1 + c2;
7545 unsigned ShiftSum =
7546 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
7547 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
7548 return true;
7549 };
7550 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
7551 SDValue ShiftValue;
7552 if (VT.isVector())
7553 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
7554 else
7555 ShiftValue = ShiftValues[0];
7556 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
7557 }
7558 }
7559
7560 // fold (sra (shl X, m), (sub result_size, n))
7561 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
7562 // result_size - n != m.
7563 // If truncate is free for the target sext(shl) is likely to result in better
7564 // code.
7565 if (N0.getOpcode() == ISD::SHL && N1C) {
7566 // Get the two constanst of the shifts, CN0 = m, CN = n.
7567 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
7568 if (N01C) {
7569 LLVMContext &Ctx = *DAG.getContext();
7570 // Determine what the truncate's result bitsize and type would be.
7571 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
7572
7573 if (VT.isVector())
7574 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7575
7576 // Determine the residual right-shift amount.
7577 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
7578
7579 // If the shift is not a no-op (in which case this should be just a sign
7580 // extend already), the truncated to type is legal, sign_extend is legal
7581 // on that type, and the truncate to that type is both legal and free,
7582 // perform the transform.
7583 if ((ShiftAmt > 0) &&
7586 TLI.isTruncateFree(VT, TruncVT)) {
7587 SDLoc DL(N);
7588 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
7590 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
7591 N0.getOperand(0), Amt);
7592 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
7593 Shift);
7594 return DAG.getNode(ISD::SIGN_EXTEND, DL,
7595 N->getValueType(0), Trunc);
7596 }
7597 }
7598 }
7599
7600 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
7601 if (N1.getOpcode() == ISD::TRUNCATE &&
7602 N1.getOperand(0).getOpcode() == ISD::AND) {
7603 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7604 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
7605 }
7606
7607 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
7608 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
7609 // if c1 is equal to the number of bits the trunc removes
7610 // TODO - support non-uniform vector shift amounts.
7611 if (N0.getOpcode() == ISD::TRUNCATE &&
7612 (N0.getOperand(0).getOpcode() == ISD::SRL ||
7613 N0.getOperand(0).getOpcode() == ISD::SRA) &&
7614 N0.getOperand(0).hasOneUse() &&
7615 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
7616 SDValue N0Op0 = N0.getOperand(0);
7617 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
7618 EVT LargeVT = N0Op0.getValueType();
7619 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
7620 if (LargeShift->getAPIntValue() == TruncBits) {
7621 SDLoc DL(N);
7622 SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
7623 getShiftAmountTy(LargeVT));
7624 SDValue SRA =
7625 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
7626 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
7627 }
7628 }
7629 }
7630
7631 // Simplify, based on bits shifted out of the LHS.
7632 // TODO - support non-uniform vector shift amounts.
7633 if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7634 return SDValue(N, 0);
7635
7636 // If the sign bit is known to be zero, switch this to a SRL.
7637 if (DAG.SignBitIsZero(N0))
7638 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
7639
7640 if (N1C && !N1C->isOpaque())
7641 if (SDValue NewSRA = visitShiftByConstant(N, N1C))
7642 return NewSRA;
7643
7644 return SDValue();
7645}
7646
7647SDValue DAGCombiner::visitSRL(SDNode *N) {
7648 SDValue N0 = N->getOperand(0);
7649 SDValue N1 = N->getOperand(1);
7650 if (SDValue V = DAG.simplifyShift(N0, N1))
7651 return V;
7652
7653 EVT VT = N0.getValueType();
7654 unsigned OpSizeInBits = VT.getScalarSizeInBits();
7655
7656 // fold vector ops
7657 if (VT.isVector())
7658 if (SDValue FoldedVOp = SimplifyVBinOp(N))
7659 return FoldedVOp;
7660
7662
7663 // fold (srl c1, c2) -> c1 >>u c2
7664 // TODO - support non-uniform vector shift amounts.
7666 if (N0C && N1C && !N1C->isOpaque())
7667 return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
7668
7669 if (SDValue NewSel = foldBinOpIntoSelect(N))
7670 return NewSel;
7671
7672 // if (srl x, c) is known to be zero, return 0
7673 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
7674 APInt::getAllOnesValue(OpSizeInBits)))
7675 return DAG.getConstant(0, SDLoc(N), VT);
7676
7677 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
7678 if (N0.getOpcode() == ISD::SRL) {
7679 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7680 ConstantSDNode *RHS) {
7681 APInt c1 = LHS->getAPIntValue();
7682 APInt c2 = RHS->getAPIntValue();
7683 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7684 return (c1 + c2).uge(OpSizeInBits);
7685 };
7686 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7687 return DAG.getConstant(0, SDLoc(N), VT);
7688
7689 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7690 ConstantSDNode *RHS) {
7691 APInt c1 = LHS->getAPIntValue();
7692 APInt c2 = RHS->getAPIntValue();
7693 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7694 return (c1 + c2).ult(OpSizeInBits);
7695 };
7696 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7697 SDLoc DL(N);
7698 EVT ShiftVT = N1.getValueType();
7699 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7700 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
7701 }
7702 }
7703
7704 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
7705 // TODO - support non-uniform vector shift amounts.
7706 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
7707 N0.getOperand(0).getOpcode() == ISD::SRL) {
7708 if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
7709 uint64_t c1 = N001C->getZExtValue();
7710 uint64_t c2 = N1C->getZExtValue();
7711 EVT InnerShiftVT = N0.getOperand(0).getValueType();
7712 EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
7713 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
7714 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
7715 if (c1 + OpSizeInBits == InnerShiftSize) {
7716 SDLoc DL(N0);
7717 if (c1 + c2 >= InnerShiftSize)
7718 return DAG.getConstant(0, DL, VT);
7719 return DAG.getNode(ISD::TRUNCATE, DL, VT,
7720 DAG.getNode(ISD::SRL, DL, InnerShiftVT,
7721 N0.getOperand(0).getOperand(0),
7722 DAG.getConstant(c1 + c2, DL,
7723 ShiftCountVT)));
7724 }
7725 }
7726 }
7727
7728 // fold (srl (shl x, c), c) -> (and x, cst2)
7729 // TODO - (srl (shl x, c1), c2).
7730 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
7731 isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
7732 SDLoc DL(N);
7733 SDValue Mask =
7734 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
7735 AddToWorklist(Mask.getNode());
7736 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
7737 }
7738
7739 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
7740 // TODO - support non-uniform vector shift amounts.
7741 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7742 // Shifting in all undef bits?
7743 EVT SmallVT = N0.getOperand(0).getValueType();
7744 unsigned BitSize = SmallVT.getScalarSizeInBits();
7745 if (N1C->getAPIntValue().uge(BitSize))
7746 return DAG.getUNDEF(VT);
7747
7748 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
7749 uint64_t ShiftAmt = N1C->getZExtValue();
7750 SDLoc DL0(N0);
7751 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
7752 N0.getOperand(0),
7753 DAG.getConstant(ShiftAmt, DL0,
7754 getShiftAmountTy(SmallVT)));
7755 AddToWorklist(SmallShift.getNode());
7756 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
7757 SDLoc DL(N);
7758 return DAG.getNode(ISD::AND, DL, VT,
7759 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
7760 DAG.getConstant(Mask, DL, VT));
7761 }
7762 }
7763
7764 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
7765 // bit, which is unmodified by sra.
7766 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
7767 if (N0.getOpcode() == ISD::SRA)
7768 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
7769 }
7770
7771 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
7772 if (N1C && N0.getOpcode() == ISD::CTLZ &&
7773 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
7774 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
7775
7776 // If any of the input bits are KnownOne, then the input couldn't be all
7777 // zeros, thus the result of the srl will always be zero.
7778 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
7779
7780 // If all of the bits input the to ctlz node are known to be zero, then
7781 // the result of the ctlz is "32" and the result of the shift is one.
7782 APInt UnknownBits = ~Known.Zero;
7783 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
7784
7785 // Otherwise, check to see if there is exactly one bit input to the ctlz.
7786 if (UnknownBits.isPowerOf2()) {
7787 // Okay, we know that only that the single bit specified by UnknownBits
7788 // could be set on input to the CTLZ node. If this bit is set, the SRL
7789 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
7790 // to an SRL/XOR pair, which is likely to simplify more.
7791 unsigned ShAmt = UnknownBits.countTrailingZeros();
7792 SDValue Op = N0.getOperand(0);
7793
7794 if (ShAmt) {
7795 SDLoc DL(N0);
7796 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
7797 DAG.getConstant(ShAmt, DL,
7798 getShiftAmountTy(Op.getValueType())));
7799 AddToWorklist(Op.getNode());
7800 }
7801
7802 SDLoc DL(N);
7803 return DAG.getNode(ISD::XOR, DL, VT,
7804 Op, DAG.getConstant(1, DL, VT));
7805 }
7806 }
7807
7808 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
7809 if (N1.getOpcode() == ISD::TRUNCATE &&
7810 N1.getOperand(0).getOpcode() == ISD::AND) {
7811 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7812 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
7813 }
7814
7815 // fold operands of srl based on knowledge that the low bits are not
7816 // demanded.
7817 // TODO - support non-uniform vector shift amounts.
7818 if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7819 return SDValue(N, 0);
7820
7821 if (N1C && !N1C->isOpaque())
7822 if (SDValue NewSRL = visitShiftByConstant(N, N1C))
7823 return NewSRL;
7824
7825 // Attempt to convert a srl of a load into a narrower zero-extending load.
7826 if (SDValue NarrowLoad = ReduceLoadWidth(N))
7827 return NarrowLoad;
7828
7829 // Here is a common situation. We want to optimize:
7830 //
7831 // %a = ...
7832 // %b = and i32 %a, 2
7833 // %c = srl i32 %b, 1
7834 // brcond i32 %c ...
7835 //
7836 // into
7837 //
7838 // %a = ...
7839 // %b = and %a, 2
7840 // %c = setcc eq %b, 0
7841 // brcond %c ...
7842 //
7843 // However when after the source operand of SRL is optimized into AND, the SRL
7844 // itself may not be optimized further. Look for it and add the BRCOND into
7845 // the worklist.
7846 if (N->hasOneUse()) {
7847 SDNode *Use = *N->use_begin();
7848 if (Use->getOpcode() == ISD::BRCOND)
7849 AddToWorklist(Use);
7850 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
7851 // Also look pass the truncate.
7852 Use = *Use->use_begin();
7853 if (Use->getOpcode() == ISD::BRCOND)
7854 AddToWorklist(Use);
7855 }
7856 }
7857
7858 return SDValue();
7859}
7860
7861SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
7862 EVT VT = N->getValueType(0);
7863 SDValue N0 = N->getOperand(0);
7864 SDValue N1 = N->getOperand(1);
7865 SDValue N2 = N->getOperand(2);
7866 bool IsFSHL = N->getOpcode() == ISD::FSHL;
7867 unsigned BitWidth = VT.getScalarSizeInBits();
7868
7869 // fold (fshl N0, N1, 0) -> N0
7870 // fold (fshr N0, N1, 0) -> N1
7871 if (isPowerOf2_32(BitWidth))
7872 if (DAG.MaskedValueIsZero(
7873 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
7874 return IsFSHL ? N0 : N1;
7875
7876 auto IsUndefOrZero = [](SDValue V) {
7877 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
7878 };
7879
7880 // TODO - support non-uniform vector shift amounts.
7881 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
7882 EVT ShAmtTy = N2.getValueType();
7883
7884 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
7885 if (Cst->getAPIntValue().uge(BitWidth)) {
7886 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
7887 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
7888 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
7889 }
7890
7891 unsigned ShAmt = Cst->getZExtValue();
7892 if (ShAmt == 0)
7893 return IsFSHL ? N0 : N1;
7894
7895 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
7896 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
7897 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
7898 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
7899 if (IsUndefOrZero(N0))
7900 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
7901 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
7902 SDLoc(N), ShAmtTy));
7903 if (IsUndefOrZero(N1))
7904 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
7905 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
7906 SDLoc(N), ShAmtTy));
7907 }
7908
7909 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
7910 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
7911 // iff We know the shift amount is in range.
7912 // TODO: when is it worth doing SUB(BW, N2) as well?
7913 if (isPowerOf2_32(BitWidth)) {
7914 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
7915 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7916 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
7917 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7918 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
7919 }
7920
7921 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
7922 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
7923 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
7924 // is legal as well we might be better off avoiding non-constant (BW - N2).
7925 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
7926 if (N0 == N1 && hasOperation(RotOpc, VT))
7927 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
7928
7929 // Simplify, based on bits shifted out of N0/N1.
7931 return SDValue(N, 0);
7932
7933 return SDValue();
7934}
7935
7936SDValue DAGCombiner::visitABS(SDNode *N) {
7937 SDValue N0 = N->getOperand(0);
7938 EVT VT = N->getValueType(0);
7939
7940 // fold (abs c1) -> c2
7942 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
7943 // fold (abs (abs x)) -> (abs x)
7944 if (N0.getOpcode() == ISD::ABS)
7945 return N0;
7946 // fold (abs x) -> x iff not-negative
7947 if (DAG.SignBitIsZero(N0))
7948 return N0;
7949 return SDValue();
7950}
7951
7952SDValue DAGCombiner::visitBSWAP(SDNode *N) {
7953 SDValue N0 = N->getOperand(0);
7954 EVT VT = N->getValueType(0);
7955
7956 // fold (bswap c1) -> c2
7958 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
7959 // fold (bswap (bswap x)) -> x
7960 if (N0.getOpcode() == ISD::BSWAP)
7961 return N0->getOperand(0);
7962 return SDValue();
7963}
7964
7965SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
7966 SDValue N0 = N->getOperand(0);
7967 EVT VT = N->getValueType(0);
7968
7969 // fold (bitreverse c1) -> c2
7971 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
7972 // fold (bitreverse (bitreverse x)) -> x
7973 if (N0.getOpcode() == ISD::BITREVERSE)
7974 return N0.getOperand(0);
7975 return SDValue();
7976}
7977
7978SDValue DAGCombiner::visitCTLZ(SDNode *N) {
7979 SDValue N0 = N->getOperand(0);
7980 EVT VT = N->getValueType(0);
7981
7982 // fold (ctlz c1) -> c2
7984 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
7985
7986 // If the value is known never to be zero, switch to the undef version.
7987 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
7988 if (DAG.isKnownNeverZero(N0))
7989 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7990 }
7991
7992 return SDValue();
7993}
7994
7995SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
7996 SDValue N0 = N->getOperand(0);
7997 EVT VT = N->getValueType(0);
7998
7999 // fold (ctlz_zero_undef c1) -> c2
8001 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8002 return SDValue();
8003}
8004
8005SDValue DAGCombiner::visitCTTZ(SDNode *N) {
8006 SDValue N0 = N->getOperand(0);
8007 EVT VT = N->getValueType(0);
8008
8009 // fold (cttz c1) -> c2
8011 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
8012
8013 // If the value is known never to be zero, switch to the undef version.
8014 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
8015 if (DAG.isKnownNeverZero(N0))
8016 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8017 }
8018
8019 return SDValue();
8020}
8021
8022SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
8023 SDValue N0 = N->getOperand(0);
8024 EVT VT = N->getValueType(0);
8025
8026 // fold (cttz_zero_undef c1) -> c2
8028 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8029 return SDValue();
8030}
8031
8032SDValue DAGCombiner::visitCTPOP(SDNode *N) {
8033 SDValue N0 = N->getOperand(0);
8034 EVT VT = N->getValueType(0);
8035
8036 // fold (ctpop c1) -> c2
8038 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
8039 return SDValue();
8040}
8041
8042// FIXME: This should be checking for no signed zeros on individual operands, as
8043// well as no nans.
8045 SDValue RHS,
8046 const TargetLowering &TLI) {
8047 const TargetOptions &Options = DAG.getTarget().Options;
8048 EVT VT = LHS.getValueType();
8049
8050 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
8052 DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
8053}
8054
8055/// Generate Min/Max node
8056static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
8057 SDValue RHS, SDValue True, SDValue False,
8058 ISD::CondCode CC, const TargetLowering &TLI,
8059 SelectionDAG &DAG) {
8060 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
8061 return SDValue();
8062
8063 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
8064 switch (CC) {
8065 case ISD::SETOLT:
8066 case ISD::SETOLE:
8067 case ISD::SETLT:
8068 case ISD::SETLE:
8069 case ISD::SETULT:
8070 case ISD::SETULE: {
8071 // Since it's known never nan to get here already, either fminnum or
8072 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
8073 // expanded in terms of it.
8074 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8075 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8076 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8077
8078 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
8079 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8080 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8081 return SDValue();
8082 }
8083 case ISD::SETOGT:
8084 case ISD::SETOGE:
8085 case ISD::SETGT:
8086 case ISD::SETGE:
8087 case ISD::SETUGT:
8088 case ISD::SETUGE: {
8089 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8090 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8091 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8092
8093 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
8094 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8095 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8096 return SDValue();
8097 }
8098 default:
8099 return SDValue();
8100 }
8101}
8102
8103SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
8104 SDValue Cond = N->getOperand(0);
8105 SDValue N1 = N->getOperand(1);
8106 SDValue N2 = N->getOperand(2);
8107 EVT VT = N->getValueType(0);
8108 EVT CondVT = Cond.getValueType();
8109 SDLoc DL(N);
8110
8111 if (!VT.isInteger())
8112 return SDValue();
8113
8114 auto *C1 = dyn_cast<ConstantSDNode>(N1);
8115 auto *C2 = dyn_cast<ConstantSDNode>(N2);
8116 if (!C1 || !C2)
8117 return SDValue();
8118
8119 // Only do this before legalization to avoid conflicting with target-specific
8120 // transforms in the other direction (create a select from a zext/sext). There
8121 // is also a target-independent combine here in DAGCombiner in the other
8122 // direction for (select Cond, -1, 0) when the condition is not i1.
8123 if (CondVT == MVT::i1 && !LegalOperations) {
8124 if (C1->isNullValue() && C2->isOne()) {
8125 // select Cond, 0, 1 --> zext (!Cond)
8126 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8127 if (VT != MVT::i1)
8128 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
8129 return NotCond;
8130 }
8131 if (C1->isNullValue() && C2->isAllOnesValue()) {
8132 // select Cond, 0, -1 --> sext (!Cond)
8133 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8134 if (VT != MVT::i1)
8135 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
8136 return NotCond;
8137 }
8138 if (C1->isOne() && C2->isNullValue()) {
8139 // select Cond, 1, 0 --> zext (Cond)
8140 if (VT != MVT::i1)
8141 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8142 return Cond;
8143 }
8144 if (C1->isAllOnesValue() && C2->isNullValue()) {
8145 // select Cond, -1, 0 --> sext (Cond)
8146 if (VT != MVT::i1)
8147 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8148 return Cond;
8149 }
8150
8151 // For any constants that differ by 1, we can transform the select into an
8152 // extend and add. Use a target hook because some targets may prefer to
8153 // transform in the other direction.
8154 if (TLI.convertSelectOfConstantsToMath(VT)) {
8155 if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
8156 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
8157 if (VT != MVT::i1)
8158 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8159 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8160 }
8161 if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
8162 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
8163 if (VT != MVT::i1)
8164 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8165 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8166 }
8167 }
8168
8169 return SDValue();
8170 }
8171
8172 // fold (select Cond, 0, 1) -> (xor Cond, 1)
8173 // We can't do this reliably if integer based booleans have different contents
8174 // to floating point based booleans. This is because we can't tell whether we
8175 // have an integer-based boolean or a floating-point-based boolean unless we
8176 // can find the SETCC that produced it and inspect its operands. This is
8177 // fairly easy if C is the SETCC node, but it can potentially be
8178 // undiscoverable (or not reasonably discoverable). For example, it could be
8179 // in another basic block or it could require searching a complicated
8180 // expression.
8181 if (CondVT.isInteger() &&
8182 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
8184 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
8186 C1->isNullValue() && C2->isOne()) {
8187 SDValue NotCond =
8188 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
8189 if (VT.bitsEq(CondVT))
8190 return NotCond;
8191 return DAG.getZExtOrTrunc(NotCond, DL, VT);
8192 }
8193
8194 return SDValue();
8195}
8196
8197SDValue DAGCombiner::visitSELECT(SDNode *N) {
8198 SDValue N0 = N->getOperand(0);
8199 SDValue N1 = N->getOperand(1);
8200 SDValue N2 = N->getOperand(2);
8201 EVT VT = N->getValueType(0);
8202 EVT VT0 = N0.getValueType();
8203 SDLoc DL(N);
8204 SDNodeFlags Flags = N->getFlags();
8205
8206 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8207 return V;
8208
8209 // fold (select X, X, Y) -> (or X, Y)
8210 // fold (select X, 1, Y) -> (or C, Y)
8211 if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
8212 return DAG.getNode(ISD::OR, DL, VT, N0, N2);
8213
8214 if (SDValue V = foldSelectOfConstants(N))
8215 return V;
8216
8217 // fold (select C, 0, X) -> (and (not C), X)
8218 if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
8219 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8220 AddToWorklist(NOTNode.getNode());
8221 return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
8222 }
8223 // fold (select C, X, 1) -> (or (not C), X)
8224 if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
8225 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8226 AddToWorklist(NOTNode.getNode());
8227 return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
8228 }
8229 // fold (select X, Y, X) -> (and X, Y)
8230 // fold (select X, Y, 0) -> (and X, Y)
8231 if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
8232 return DAG.getNode(ISD::AND, DL, VT, N0, N1);
8233
8234 // If we can fold this based on the true/false value, do so.
8235 if (SimplifySelectOps(N, N1, N2))
8236 return SDValue(N, 0); // Don't revisit N.
8237
8238 if (VT0 == MVT::i1) {
8239 // The code in this block deals with the following 2 equivalences:
8240 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
8241 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
8242 // The target can specify its preferred form with the
8243 // shouldNormalizeToSelectSequence() callback. However we always transform
8244 // to the right anyway if we find the inner select exists in the DAG anyway
8245 // and we always transform to the left side if we know that we can further
8246 // optimize the combination of the conditions.
8247 bool normalizeToSequence =
8249 // select (and Cond0, Cond1), X, Y
8250 // -> select Cond0, (select Cond1, X, Y), Y
8251 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
8252 SDValue Cond0 = N0->getOperand(0);
8253 SDValue Cond1 = N0->getOperand(1);
8254 SDValue InnerSelect =
8255 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
8256 if (normalizeToSequence || !InnerSelect.use_empty())
8257 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
8258 InnerSelect, N2, Flags);
8259 // Cleanup on failure.
8260 if (InnerSelect.use_empty())
8261 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8262 }
8263 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
8264 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
8265 SDValue Cond0 = N0->getOperand(0);
8266 SDValue Cond1 = N0->getOperand(1);
8267 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
8268 Cond1, N1, N2, Flags);
8269 if (normalizeToSequence || !InnerSelect.use_empty())
8270 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
8271 InnerSelect, Flags);
8272 // Cleanup on failure.
8273 if (InnerSelect.use_empty())
8274 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8275 }
8276
8277 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
8278 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
8279 SDValue N1_0 = N1->getOperand(0);
8280 SDValue N1_1 = N1->getOperand(1);
8281 SDValue N1_2 = N1->getOperand(2);
8282 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
8283 // Create the actual and node if we can generate good code for it.
8284 if (!normalizeToSequence) {
8285 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
8286 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
8287 N2, Flags);
8288 }
8289 // Otherwise see if we can optimize the "and" to a better pattern.
8290 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
8291 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
8292 N2, Flags);
8293 }
8294 }
8295 }
8296 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
8297 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
8298 SDValue N2_0 = N2->getOperand(0);
8299 SDValue N2_1 = N2->getOperand(1);
8300 SDValue N2_2 = N2->getOperand(2);
8301 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
8302 // Create the actual or node if we can generate good code for it.
8303 if (!normalizeToSequence) {
8304 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
8305 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
8306 N2_2, Flags);
8307 }
8308 // Otherwise see if we can optimize to a better pattern.
8309 if (SDValue Combined = visitORLike(N0, N2_0, N))
8310 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
8311 N2_2, Flags);
8312 }
8313 }
8314 }
8315
8316 // select (not Cond), N1, N2 -> select Cond, N2, N1
8317 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
8318 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
8319 SelectOp->setFlags(Flags);
8320 return SelectOp;
8321 }
8322
8323 // Fold selects based on a setcc into other things, such as min/max/abs.
8324 if (N0.getOpcode() == ISD::SETCC) {
8325 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
8326 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8327
8328 // select (fcmp lt x, y), x, y -> fminnum x, y
8329 // select (fcmp gt x, y), x, y -> fmaxnum x, y
8330 //
8331 // This is OK if we don't care what happens if either operand is a NaN.
8332 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
8333 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
8334 CC, TLI, DAG))
8335 return FMinMax;
8336
8337 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
8338 // This is conservatively limited to pre-legal-operations to give targets
8339 // a chance to reverse the transform if they want to do that. Also, it is
8340 // unlikely that the pattern would be formed late, so it's probably not
8341 // worth going through the other checks.
8342 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
8343 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
8344 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
8345 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
8346 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
8347 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
8348 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
8349 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
8350 //
8351 // The IR equivalent of this transform would have this form:
8352 // %a = add %x, C
8353 // %c = icmp ugt %x, ~C
8354 // %r = select %c, -1, %a
8355 // =>
8356 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
8357 // %u0 = extractvalue %u, 0
8358 // %u1 = extractvalue %u, 1
8359 // %r = select %u1, -1, %u0
8360 SDVTList VTs = DAG.getVTList(VT, VT0);
8361 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
8362 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
8363 }
8364 }
8365
8366 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
8367 (!LegalOperations &&
8369 // Any flags available in a select/setcc fold will be on the setcc as they
8370 // migrated from fcmp
8371 Flags = N0.getNode()->getFlags();
8372 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
8373 N2, N0.getOperand(2));
8374 SelectNode->setFlags(Flags);
8375 return SelectNode;
8376 }
8377
8378 return SimplifySelect(DL, N0, N1, N2);
8379 }
8380
8381 return SDValue();
8382}
8383
8384static
8385std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
8386 SDLoc DL(N);
8387 EVT LoVT, HiVT;
8388 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
8389
8390 // Split the inputs.
8391 SDValue Lo, Hi, LL, LH, RL, RH;
8392 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
8393 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
8394
8395 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
8396 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
8397
8398 return std::make_pair(Lo, Hi);
8399}
8400
8401// This function assumes all the vselect's arguments are CONCAT_VECTOR
8402// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
8404 SDLoc DL(N);
8405 SDValue Cond = N->getOperand(0);
8406 SDValue LHS = N->getOperand(1);
8407 SDValue RHS = N->getOperand(2);
8408 EVT VT = N->getValueType(0);
8409 int NumElems = VT.getVectorNumElements();
8411 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
8412 Cond.getOpcode() == ISD::BUILD_VECTOR);
8413
8414 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
8415 // binary ones here.
8416 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
8417 return SDValue();
8418
8419 // We're sure we have an even number of elements due to the
8420 // concat_vectors we have as arguments to vselect.
8421 // Skip BV elements until we find one that's not an UNDEF
8422 // After we find an UNDEF element, keep looping until we get to half the
8423 // length of the BV and see if all the non-undef nodes are the same.
8424 ConstantSDNode *BottomHalf = nullptr;
8425 for (int i = 0; i < NumElems / 2; ++i) {
8426 if (Cond->getOperand(i)->isUndef())
8427 continue;
8428
8429 if (BottomHalf == nullptr)
8430 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8431 else if (Cond->getOperand(i).getNode() != BottomHalf)
8432 return SDValue();
8433 }
8434
8435 // Do the same for the second half of the BuildVector
8436 ConstantSDNode *TopHalf = nullptr;
8437 for (int i = NumElems / 2; i < NumElems; ++i) {
8438 if (Cond->getOperand(i)->isUndef())
8439 continue;
8440
8441 if (TopHalf == nullptr)
8442 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8443 else if (Cond->getOperand(i).getNode() != TopHalf)
8444 return SDValue();
8445 }
8446
8447 assert(TopHalf && BottomHalf &&
8448 "One half of the selector was all UNDEFs and the other was all the "
8449 "same value. This should have been addressed before this function.");
8450 return DAG.getNode(
8451 ISD::CONCAT_VECTORS, DL, VT,
8452 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
8453 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
8454}
8455
8456SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
8457 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
8458 SDValue Mask = MSC->getMask();
8459 SDValue Data = MSC->getValue();
8460 SDValue Chain = MSC->getChain();
8461 SDLoc DL(N);
8462
8463 // Zap scatters with a zero mask.
8464 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8465 return Chain;
8466
8467 if (Level >= AfterLegalizeTypes)
8468 return SDValue();
8469
8470 // If the MSCATTER data type requires splitting and the mask is provided by a
8471 // SETCC, then split both nodes and its operands before legalization. This
8472 // prevents the type legalizer from unrolling SETCC into scalar comparisons
8473 // and enables future optimizations (e.g. min/max pattern matching on X86).
8474 if (Mask.getOpcode() != ISD::SETCC)
8475 return SDValue();
8476
8477 // Check if any splitting is required.
8478 if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
8480 return SDValue();
8481 SDValue MaskLo, MaskHi;
8482 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8483
8484 EVT LoVT, HiVT;
8485 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
8486
8487 EVT MemoryVT = MSC->getMemoryVT();
8488 unsigned Alignment = MSC->getOriginalAlignment();
8489
8490 EVT LoMemVT, HiMemVT;
8491 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8492
8493 SDValue DataLo, DataHi;
8494 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
8495
8496 SDValue Scale = MSC->getScale();
8497 SDValue BasePtr = MSC->getBasePtr();
8498 SDValue IndexLo, IndexHi;
8499 std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
8500
8504 Alignment, MSC->getAAInfo(), MSC->getRanges());
8505
8506 SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
8508 DataLo.getValueType(), DL, OpsLo, MMO);
8509
8510 // The order of the Scatter operation after split is well defined. The "Hi"
8511 // part comes after the "Lo". So these two operations should be chained one
8512 // after another.
8513 SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
8514 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
8515 DL, OpsHi, MMO);
8516}
8517
8518SDValue DAGCombiner::visitMSTORE(SDNode *N) {
8519 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
8520 SDValue Mask = MST->getMask();
8521 SDValue Data = MST->getValue();
8522 SDValue Chain = MST->getChain();
8523 EVT VT = Data.getValueType();
8524 SDLoc DL(N);
8525
8526 // Zap masked stores with a zero mask.
8527 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8528 return Chain;
8529
8530 if (Level >= AfterLegalizeTypes)
8531 return SDValue();
8532
8533 // If the MSTORE data type requires splitting and the mask is provided by a
8534 // SETCC, then split both nodes and its operands before legalization. This
8535 // prevents the type legalizer from unrolling SETCC into scalar comparisons
8536 // and enables future optimizations (e.g. min/max pattern matching on X86).
8537 if (Mask.getOpcode() == ISD::SETCC) {
8538 // Check if any splitting is required.
8539 if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8541 return SDValue();
8542
8543 SDValue MaskLo, MaskHi, Lo, Hi;
8544 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8545
8546 SDValue Ptr = MST->getBasePtr();
8547
8548 EVT MemoryVT = MST->getMemoryVT();
8549 unsigned Alignment = MST->getOriginalAlignment();
8550
8551 EVT LoMemVT, HiMemVT;
8552 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8553
8554 SDValue DataLo, DataHi;
8555 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
8556
8560 Alignment, MST->getAAInfo(), MST->getRanges());
8561
8562 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
8563 MST->isTruncatingStore(),
8564 MST->isCompressingStore());
8565
8566 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
8567 MST->isCompressingStore());
8568 unsigned HiOffset = LoMemVT.getStoreSize();
8569
8571 MST->getPointerInfo().getWithOffset(HiOffset),
8572 MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment,
8573 MST->getAAInfo(), MST->getRanges());
8574
8575 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
8576 MST->isTruncatingStore(),
8577 MST->isCompressingStore());
8578
8579 AddToWorklist(Lo.getNode());
8580 AddToWorklist(Hi.getNode());
8581
8582 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8583 }
8584 return SDValue();
8585}
8586
8587SDValue DAGCombiner::visitMGATHER(SDNode *N) {
8588 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
8589 SDValue Mask = MGT->getMask();
8590 SDLoc DL(N);
8591
8592 // Zap gathers with a zero mask.
8593 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8594 return CombineTo(N, MGT->getPassThru(), MGT->getChain());
8595
8596 if (Level >= AfterLegalizeTypes)
8597 return SDValue();
8598
8599 // If the MGATHER result requires splitting and the mask is provided by a
8600 // SETCC, then split both nodes and its operands before legalization. This
8601 // prevents the type legalizer from unrolling SETCC into scalar comparisons
8602 // and enables future optimizations (e.g. min/max pattern matching on X86).
8603
8604 if (Mask.getOpcode() != ISD::SETCC)
8605 return SDValue();
8606
8607 EVT VT = N->getValueType(0);
8608
8609 // Check if any splitting is required.
8610 if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8612 return SDValue();
8613
8614 SDValue MaskLo, MaskHi, Lo, Hi;
8615 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8616
8617 SDValue PassThru = MGT->getPassThru();
8618 SDValue PassThruLo, PassThruHi;
8619 std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
8620
8621 EVT LoVT, HiVT;
8622 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
8623
8624 SDValue Chain = MGT->getChain();
8625 EVT MemoryVT = MGT->getMemoryVT();
8626 unsigned Alignment = MGT->getOriginalAlignment();
8627
8628 EVT LoMemVT, HiMemVT;
8629 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8630
8631 SDValue Scale = MGT->getScale();
8632 SDValue BasePtr = MGT->getBasePtr();
8633 SDValue Index = MGT->getIndex();
8634 SDValue IndexLo, IndexHi;
8635 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
8636
8640 Alignment, MGT->getAAInfo(), MGT->getRanges());
8641
8642 SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
8643 Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
8644 MMO);
8645
8646 SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
8647 Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
8648 MMO);
8649
8650 AddToWorklist(Lo.getNode());
8651 AddToWorklist(Hi.getNode());
8652
8653 // Build a factor node to remember that this load is independent of the
8654 // other one.
8655 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8656 Hi.getValue(1));
8657
8658 // Legalized the chain result - switch anything that used the old chain to
8659 // use the new one.
8660 DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
8661
8662 SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8663
8664 SDValue RetOps[] = { GatherRes, Chain };
8665 return DAG.getMergeValues(RetOps, DL);
8666}
8667
8668SDValue DAGCombiner::visitMLOAD(SDNode *N) {
8669 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
8670 SDValue Mask = MLD->getMask();
8671 SDLoc DL(N);
8672
8673 // Zap masked loads with a zero mask.
8674 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8675 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
8676
8677 if (Level >= AfterLegalizeTypes)
8678 return SDValue();
8679
8680 // If the MLOAD result requires splitting and the mask is provided by a
8681 // SETCC, then split both nodes and its operands before legalization. This
8682 // prevents the type legalizer from unrolling SETCC into scalar comparisons
8683 // and enables future optimizations (e.g. min/max pattern matching on X86).
8684 if (Mask.getOpcode() == ISD::SETCC) {
8685 EVT VT = N->getValueType(0);
8686
8687 // Check if any splitting is required.
8688 if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8690 return SDValue();
8691
8692 SDValue MaskLo, MaskHi, Lo, Hi;
8693 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8694
8695 SDValue PassThru = MLD->getPassThru();
8696 SDValue PassThruLo, PassThruHi;
8697 std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
8698
8699 EVT LoVT, HiVT;
8700 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
8701
8702 SDValue Chain = MLD->getChain();
8703 SDValue Ptr = MLD->getBasePtr();
8704 EVT MemoryVT = MLD->getMemoryVT();
8705 unsigned Alignment = MLD->getOriginalAlignment();
8706
8707 EVT LoMemVT, HiMemVT;
8708 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8709
8713 Alignment, MLD->getAAInfo(), MLD->getRanges());
8714
8715 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
8716 MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
8717
8718 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
8719 MLD->isExpandingLoad());
8720 unsigned HiOffset = LoMemVT.getStoreSize();
8721
8723 MLD->getPointerInfo().getWithOffset(HiOffset),
8724 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
8725 MLD->getAAInfo(), MLD->getRanges());
8726
8727 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
8728 MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
8729
8730 AddToWorklist(Lo.getNode());
8731 AddToWorklist(Hi.getNode());
8732
8733 // Build a factor node to remember that this load is independent of the
8734 // other one.
8735 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8736 Hi.getValue(1));
8737
8738 // Legalized the chain result - switch anything that used the old chain to
8739 // use the new one.
8740 DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
8741
8742 SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8743
8744 SDValue RetOps[] = { LoadRes, Chain };
8745 return DAG.getMergeValues(RetOps, DL);
8746 }
8747 return SDValue();
8748}
8749
8750/// A vector select of 2 constant vectors can be simplified to math/logic to
8751/// avoid a variable select instruction and possibly avoid constant loads.
8752SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
8753 SDValue Cond = N->getOperand(0);
8754 SDValue N1 = N->getOperand(1);
8755 SDValue N2 = N->getOperand(2);
8756 EVT VT = N->getValueType(0);
8757 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
8761 return SDValue();
8762
8763 // Check if we can use the condition value to increment/decrement a single
8764 // constant value. This simplifies a select to an add and removes a constant
8765 // load/materialization from the general case.
8766 bool AllAddOne = true;
8767 bool AllSubOne = true;
8768 unsigned Elts = VT.getVectorNumElements();
8769 for (unsigned i = 0; i != Elts; ++i) {
8770 SDValue N1Elt = N1.getOperand(i);
8771 SDValue N2Elt = N2.getOperand(i);
8772 if (N1Elt.isUndef() || N2Elt.isUndef())
8773 continue;
8774
8775 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
8776 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
8777 if (C1 != C2 + 1)
8778 AllAddOne = false;
8779 if (C1 != C2 - 1)
8780 AllSubOne = false;
8781 }
8782
8783 // Further simplifications for the extra-special cases where the constants are
8784 // all 0 or all -1 should be implemented as folds of these patterns.
8785 SDLoc DL(N);
8786 if (AllAddOne || AllSubOne) {
8787 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
8788 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
8789 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
8790 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
8791 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
8792 }
8793
8794 // The general case for select-of-constants:
8795 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
8796 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
8797 // leave that to a machine-specific pass.
8798 return SDValue();
8799}
8800
8801SDValue DAGCombiner::visitVSELECT(SDNode *N) {
8802 SDValue N0 = N->getOperand(0);
8803 SDValue N1 = N->getOperand(1);
8804 SDValue N2 = N->getOperand(2);
8805 EVT VT = N->getValueType(0);
8806 SDLoc DL(N);
8807
8808 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8809 return V;
8810
8811 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
8812 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
8813 return DAG.getSelect(DL, VT, F, N2, N1);
8814
8815 // Canonicalize integer abs.
8816 // vselect (setg[te] X, 0), X, -X ->
8817 // vselect (setgt X, -1), X, -X ->
8818 // vselect (setl[te] X, 0), -X, X ->
8819 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
8820 if (N0.getOpcode() == ISD::SETCC) {
8821 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
8822 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8823 bool isAbs = false;
8824 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
8825
8826 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
8827 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
8828 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
8830 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
8831 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
8833
8834 if (isAbs) {
8835 EVT VT = LHS.getValueType();
8837 return DAG.getNode(ISD::ABS, DL, VT, LHS);
8838
8839 SDValue Shift = DAG.getNode(
8840 ISD::SRA, DL, VT, LHS,
8841 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
8842 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
8843 AddToWorklist(Shift.getNode());
8844 AddToWorklist(Add.getNode());
8845 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
8846 }
8847
8848 // vselect x, y (fcmp lt x, y) -> fminnum x, y
8849 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
8850 //
8851 // This is OK if we don't care about what happens if either operand is a
8852 // NaN.
8853 //
8854 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0),
8855 N0.getOperand(1), TLI)) {
8856 if (SDValue FMinMax = combineMinNumMaxNum(
8857 DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
8858 return FMinMax;
8859 }
8860
8861 // If this select has a condition (setcc) with narrower operands than the
8862 // select, try to widen the compare to match the select width.
8863 // TODO: This should be extended to handle any constant.
8864 // TODO: This could be extended to handle non-loading patterns, but that
8865 // requires thorough testing to avoid regressions.
8866 if (isNullOrNullSplat(RHS)) {
8867 EVT NarrowVT = LHS.getValueType();
8869 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
8870 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
8871 unsigned WideWidth = WideVT.getScalarSizeInBits();
8872 bool IsSigned = isSignedIntSetCC(CC);
8873 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8874 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
8875 SetCCWidth != 1 && SetCCWidth < WideWidth &&
8876 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
8877 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
8878 // Both compare operands can be widened for free. The LHS can use an
8879 // extended load, and the RHS is a constant:
8880 // vselect (ext (setcc load(X), C)), N1, N2 -->
8881 // vselect (setcc extload(X), C'), N1, N2
8882 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8883 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
8884 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
8885 EVT WideSetCCVT = getSetCCResultType(WideVT);
8886 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
8887 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
8888 }
8889 }
8890 }
8891
8892 if (SimplifySelectOps(N, N1, N2))
8893 return SDValue(N, 0); // Don't revisit N.
8894
8895 // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8897 return N1;
8898 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8900 return N2;
8901
8902 // The ConvertSelectToConcatVector function is assuming both the above
8903 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8904 // and addressed.
8905 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
8908 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
8909 return CV;
8910 }
8911
8912 if (SDValue V = foldVSelectOfConstants(N))
8913 return V;
8914
8915 return SDValue();
8916}
8917
8918SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
8919 SDValue N0 = N->getOperand(0);
8920 SDValue N1 = N->getOperand(1);
8921 SDValue N2 = N->getOperand(2);
8922 SDValue N3 = N->getOperand(3);
8923 SDValue N4 = N->getOperand(4);
8924 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
8925
8926 // fold select_cc lhs, rhs, x, x, cc -> x
8927 if (N2 == N3)
8928 return N2;
8929
8930 // Determine if the condition we're dealing with is constant
8931 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
8932 CC, SDLoc(N), false)) {
8933 AddToWorklist(SCC.getNode());
8934
8935 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
8936 if (!SCCC->isNullValue())
8937 return N2; // cond always true -> true val
8938 else
8939 return N3; // cond always false -> false val
8940 } else if (SCC->isUndef()) {
8941 // When the condition is UNDEF, just return the first operand. This is
8942 // coherent the DAG creation, no setcc node is created in this case
8943 return N2;
8944 } else if (SCC.getOpcode() == ISD::SETCC) {
8945 // Fold to a simpler select_cc
8946 SDValue SelectOp = DAG.getNode(
8947 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
8948 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
8949 SelectOp->setFlags(SCC->getFlags());
8950 return SelectOp;
8951 }
8952 }
8953
8954 // If we can fold this based on the true/false value, do so.
8955 if (SimplifySelectOps(N, N2, N3))
8956 return SDValue(N, 0); // Don't revisit N.
8957
8958 // fold select_cc into other things, such as min/max/abs
8959 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
8960}
8961
8962SDValue DAGCombiner::visitSETCC(SDNode *N) {
8963 // setcc is very commonly used as an argument to brcond. This pattern
8964 // also lend itself to numerous combines and, as a result, it is desired
8965 // we keep the argument to a brcond as a setcc as much as possible.
8966 bool PreferSetCC =
8967 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
8968
8969 SDValue Combined = SimplifySetCC(
8970 N->getValueType(0), N->getOperand(0), N->getOperand(1),
8971 cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
8972
8973 if (!Combined)
8974 return SDValue();
8975
8976 // If we prefer to have a setcc, and we don't, we'll try our best to
8977 // recreate one using rebuildSetCC.
8978 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
8979 SDValue NewSetCC = rebuildSetCC(Combined);
8980
8981 // We don't have anything interesting to combine to.
8982 if (NewSetCC.getNode() == N)
8983 return SDValue();
8984
8985 if (NewSetCC)
8986 return NewSetCC;
8987 }
8988
8989 return Combined;
8990}
8991
8992SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
8993 SDValue LHS = N->getOperand(0);
8994 SDValue RHS = N->getOperand(1);
8995 SDValue Carry = N->getOperand(2);
8996 SDValue Cond = N->getOperand(3);
8997
8998 // If Carry is false, fold to a regular SETCC.
8999 if (isNullConstant(Carry))
9000 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
9001
9002 return SDValue();
9003}
9004
9005/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
9006/// a build_vector of constants.
9007/// This function is called by the DAGCombiner when visiting sext/zext/aext
9008/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
9009/// Vector extends are not folded if operations are legal; this is to
9010/// avoid introducing illegal build_vector dag nodes.
9012 SelectionDAG &DAG, bool LegalTypes) {
9013 unsigned Opcode = N->getOpcode();
9014 SDValue N0 = N->getOperand(0);
9015 EVT VT = N->getValueType(0);
9016 SDLoc DL(N);
9017
9018 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
9019 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
9021 && "Expected EXTEND dag node in input!");
9022
9023 // fold (sext c1) -> c1
9024 // fold (zext c1) -> c1
9025 // fold (aext c1) -> c1
9026 if (isa<ConstantSDNode>(N0))
9027 return DAG.getNode(Opcode, DL, VT, N0);
9028
9029 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9030 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
9031 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9032 if (N0->getOpcode() == ISD::SELECT) {
9033 SDValue Op1 = N0->getOperand(1);
9034 SDValue Op2 = N0->getOperand(2);
9035 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
9036 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
9037 // For any_extend, choose sign extension of the constants to allow a
9038 // possible further transform to sign_extend_inreg.i.e.
9039 //
9040 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
9041 // t2: i64 = any_extend t1
9042 // -->
9043 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
9044 // -->
9045 // t4: i64 = sign_extend_inreg t3
9046 unsigned FoldOpc = Opcode;
9047 if (FoldOpc == ISD::ANY_EXTEND)
9048 FoldOpc = ISD::SIGN_EXTEND;
9049 return DAG.getSelect(DL, VT, N0->getOperand(0),
9050 DAG.getNode(FoldOpc, DL, VT, Op1),
9051 DAG.getNode(FoldOpc, DL, VT, Op2));
9052 }
9053 }
9054
9055 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
9056 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
9057 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
9058 EVT SVT = VT.getScalarType();
9059 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
9061 return SDValue();
9062
9063 // We can fold this node into a build_vector.
9064 unsigned VTBits = SVT.getSizeInBits();
9065 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
9067 unsigned NumElts = VT.getVectorNumElements();
9068
9069 // For zero-extensions, UNDEF elements still guarantee to have the upper
9070 // bits set to zero.
9071 bool IsZext =
9072 Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
9073
9074 for (unsigned i = 0; i != NumElts; ++i) {
9075 SDValue Op = N0.getOperand(i);
9076 if (Op.isUndef()) {
9077 Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
9078 continue;
9079 }
9080
9081 SDLoc DL(Op);
9082 // Get the constant value and if needed trunc it to the size of the type.
9083 // Nodes like build_vector might have constants wider than the scalar type.
9084 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
9085 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
9086 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
9087 else
9088 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
9089 }
9090
9091 return DAG.getBuildVector(VT, DL, Elts);
9092}
9093
9094// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
9095// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
9096// transformation. Returns true if extension are possible and the above
9097// mentioned transformation is profitable.
9099 unsigned ExtOpc,
9100 SmallVectorImpl<SDNode *> &ExtendNodes,
9101 const TargetLowering &TLI) {
9102 bool HasCopyToRegUses = false;
9103 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
9104 for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
9105 UE = N0.getNode()->use_end();
9106 UI != UE; ++UI) {
9107 SDNode *User = *UI;
9108 if (User == N)
9109 continue;
9110 if (UI.getUse().getResNo() != N0.getResNo())
9111 continue;
9112 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
9113 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
9114 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
9115 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
9116 // Sign bits will be lost after a zext.
9117 return false;
9118 bool Add = false;
9119 for (unsigned i = 0; i != 2; ++i) {
9120 SDValue UseOp = User->getOperand(i);
9121 if (UseOp == N0)
9122 continue;
9123 if (!isa<ConstantSDNode>(UseOp))
9124 return false;
9125 Add = true;
9126 }
9127 if (Add)
9128 ExtendNodes.push_back(User);
9129 continue;
9130 }
9131 // If truncates aren't free and there are users we can't
9132 // extend, it isn't worthwhile.
9133 if (!isTruncFree)
9134 return false;
9135 // Remember if this value is live-out.
9136 if (User->getOpcode() == ISD::CopyToReg)
9137 HasCopyToRegUses = true;
9138 }
9139
9140 if (HasCopyToRegUses) {
9141 bool BothLiveOut = false;
9142 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
9143 UI != UE; ++UI) {
9144 SDUse &Use = UI.getUse();
9145 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
9146 BothLiveOut = true;
9147 break;
9148 }
9149 }
9150 if (BothLiveOut)
9151 // Both unextended and extended values are live out. There had better be
9152 // a good reason for the transformation.
9153 return ExtendNodes.size();
9154 }
9155 return true;
9156}
9157
9158void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
9159 SDValue OrigLoad, SDValue ExtLoad,
9160 ISD::NodeType ExtType) {
9161 // Extend SetCC uses if necessary.
9162 SDLoc DL(ExtLoad);
9163 for (SDNode *SetCC : SetCCs) {
9165
9166 for (unsigned j = 0; j != 2; ++j) {
9167 SDValue SOp = SetCC->getOperand(j);
9168 if (SOp == OrigLoad)
9169 Ops.push_back(ExtLoad);
9170 else
9171 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
9172 }
9173
9174 Ops.push_back(SetCC->getOperand(2));
9175 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
9176 }
9177}
9178
9179// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
9180SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
9181 SDValue N0 = N->getOperand(0);
9182 EVT DstVT = N->getValueType(0);
9183 EVT SrcVT = N0.getValueType();
9184
9185 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9186 N->getOpcode() == ISD::ZERO_EXTEND) &&
9187 "Unexpected node type (not an extend)!");
9188
9189 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
9190 // For example, on a target with legal v4i32, but illegal v8i32, turn:
9191 // (v8i32 (sext (v8i16 (load x))))
9192 // into:
9193 // (v8i32 (concat_vectors (v4i32 (sextload x)),
9194 // (v4i32 (sextload (x + 16)))))
9195 // Where uses of the original load, i.e.:
9196 // (v8i16 (load x))
9197 // are replaced with:
9198 // (v8i16 (truncate
9199 // (v8i32 (concat_vectors (v4i32 (sextload x)),
9200 // (v4i32 (sextload (x + 16)))))))
9201 //
9202 // This combine is only applicable to illegal, but splittable, vectors.
9203 // All legal types, and illegal non-vector types, are handled elsewhere.
9204 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
9205 //
9206 if (N0->getOpcode() != ISD::LOAD)
9207 return SDValue();
9208
9209 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9210
9211 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
9212 !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
9213 !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9214 return SDValue();
9215
9217 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
9218 return SDValue();
9219
9220 ISD::LoadExtType ExtType =
9221 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9222
9223 // Try to split the vector types to get down to legal types.
9224 EVT SplitSrcVT = SrcVT;
9225 EVT SplitDstVT = DstVT;
9226 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
9227 SplitSrcVT.getVectorNumElements() > 1) {
9228 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
9229 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
9230 }
9231
9232 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
9233 return SDValue();
9234
9235 SDLoc DL(N);
9236 const unsigned NumSplits =
9237 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
9238 const unsigned Stride = SplitSrcVT.getStoreSize();
9241
9242 SDValue BasePtr = LN0->getBasePtr();
9243 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
9244 const unsigned Offset = Idx * Stride;
9245 const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
9246
9247 SDValue SplitLoad = DAG.getExtLoad(
9248 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
9249 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
9250 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9251
9252 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
9253 DAG.getConstant(Stride, DL, BasePtr.getValueType()));
9254
9255 Loads.push_back(SplitLoad.getValue(0));
9256 Chains.push_back(SplitLoad.getValue(1));
9257 }
9258
9259 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9260 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
9261
9262 // Simplify TF.
9263 AddToWorklist(NewChain.getNode());
9264
9265 CombineTo(N, NewValue);
9266
9267 // Replace uses of the original load (before extension)
9268 // with a truncate of the concatenated sextloaded vectors.
9269 SDValue Trunc =
9270 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
9271 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
9272 CombineTo(N0.getNode(), Trunc, NewChain);
9273 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9274}
9275
9276// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9277// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9278SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
9279 assert(N->getOpcode() == ISD::ZERO_EXTEND);
9280 EVT VT = N->getValueType(0);
9281 EVT OrigVT = N->getOperand(0).getValueType();
9282 if (TLI.isZExtFree(OrigVT, VT))
9283 return SDValue();
9284
9285 // and/or/xor
9286 SDValue N0 = N->getOperand(0);
9287 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9288 N0.getOpcode() == ISD::XOR) ||
9289 N0.getOperand(1).getOpcode() != ISD::Constant ||
9290 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
9291 return SDValue();
9292
9293 // shl/shr
9294 SDValue N1 = N0->getOperand(0);
9295 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
9296 N1.getOperand(1).getOpcode() != ISD::Constant ||
9297 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
9298 return SDValue();
9299
9300 // load
9301 if (!isa<LoadSDNode>(N1.getOperand(0)))
9302 return SDValue();
9303 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
9304 EVT MemVT = Load->getMemoryVT();
9305 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
9306 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
9307 return SDValue();
9308
9309
9310 // If the shift op is SHL, the logic op must be AND, otherwise the result
9311 // will be wrong.
9312 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
9313 return SDValue();
9314
9315 if (!N0.hasOneUse() || !N1.hasOneUse())
9316 return SDValue();
9317
9319 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
9320 ISD::ZERO_EXTEND, SetCCs, TLI))
9321 return SDValue();
9322
9323 // Actually do the transformation.
9324 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
9325 Load->getChain(), Load->getBasePtr(),
9326 Load->getMemoryVT(), Load->getMemOperand());
9327
9328 SDLoc DL1(N1);
9329 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
9330 N1.getOperand(1));
9331
9332 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9333 Mask = Mask.zext(VT.getSizeInBits());
9334 SDLoc DL0(N0);
9335 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
9336 DAG.getConstant(Mask, DL0, VT));
9337
9338 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9339 CombineTo(N, And);
9340 if (SDValue(Load, 0).hasOneUse()) {
9341 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
9342 } else {
9343 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
9344 Load->getValueType(0), ExtLoad);
9345 CombineTo(Load, Trunc, ExtLoad.getValue(1));
9346 }
9347
9348 // N0 is dead at this point.
9349 recursivelyDeleteUnusedNodes(N0.getNode());
9350
9351 return SDValue(N,0); // Return N so it doesn't get rechecked!
9352}
9353
9354/// If we're narrowing or widening the result of a vector select and the final
9355/// size is the same size as a setcc (compare) feeding the select, then try to
9356/// apply the cast operation to the select's operands because matching vector
9357/// sizes for a select condition and other operands should be more efficient.
9358SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
9359 unsigned CastOpcode = Cast->getOpcode();
9360 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
9361 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
9362 CastOpcode == ISD::FP_ROUND) &&
9363 "Unexpected opcode for vector select narrowing/widening");
9364
9365 // We only do this transform before legal ops because the pattern may be
9366 // obfuscated by target-specific operations after legalization. Do not create
9367 // an illegal select op, however, because that may be difficult to lower.
9368 EVT VT = Cast->getValueType(0);
9369 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
9370 return SDValue();
9371
9372 SDValue VSel = Cast->getOperand(0);
9373 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
9374 VSel.getOperand(0).getOpcode() != ISD::SETCC)
9375 return SDValue();
9376
9377 // Does the setcc have the same vector size as the casted select?
9378 SDValue SetCC = VSel.getOperand(0);
9379 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
9380 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
9381 return SDValue();
9382
9383 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
9384 SDValue A = VSel.getOperand(1);
9385 SDValue B = VSel.getOperand(2);
9386 SDValue CastA, CastB;
9387 SDLoc DL(Cast);
9388 if (CastOpcode == ISD::FP_ROUND) {
9389 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
9390 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
9391 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
9392 } else {
9393 CastA = DAG.getNode(CastOpcode, DL, VT, A);
9394 CastB = DAG.getNode(CastOpcode, DL, VT, B);
9395 }
9396 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
9397}
9398
9399// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9400// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9402 const TargetLowering &TLI, EVT VT,
9403 bool LegalOperations, SDNode *N,
9404 SDValue N0, ISD::LoadExtType ExtLoadType) {
9405 SDNode *N0Node = N0.getNode();
9406 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
9407 : ISD::isZEXTLoad(N0Node);
9408 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
9409 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
9410 return SDValue();
9411
9412 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9413 EVT MemVT = LN0->getMemoryVT();
9414 if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
9415 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
9416 return SDValue();
9417
9418 SDValue ExtLoad =
9419 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9420 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
9421 Combiner.CombineTo(N, ExtLoad);
9422 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9423 if (LN0->use_empty())
9424 Combiner.recursivelyDeleteUnusedNodes(LN0);
9425 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9426}
9427
9428// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9429// Only generate vector extloads when 1) they're legal, and 2) they are
9430// deemed desirable by the target.
9432 const TargetLowering &TLI, EVT VT,
9433 bool LegalOperations, SDNode *N, SDValue N0,
9434 ISD::LoadExtType ExtLoadType,
9435 ISD::NodeType ExtOpc) {
9436 if (!ISD::isNON_EXTLoad(N0.getNode()) ||
9438 ((LegalOperations || VT.isVector() ||
9439 cast<LoadSDNode>(N0)->isVolatile()) &&
9440 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
9441 return {};
9442
9443 bool DoXform = true;
9445 if (!N0.hasOneUse())
9446 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
9447 if (VT.isVector())
9448 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
9449 if (!DoXform)
9450 return {};
9451
9452 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9453 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9454 LN0->getBasePtr(), N0.getValueType(),
9455 LN0->getMemOperand());
9456 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
9457 // If the load value is used only by N, replace it via CombineTo N.
9458 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
9459 Combiner.CombineTo(N, ExtLoad);
9460 if (NoReplaceTrunc) {
9461 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9462 Combiner.recursivelyDeleteUnusedNodes(LN0);
9463 } else {
9464 SDValue Trunc =
9465 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
9466 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9467 }
9468 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9469}
9470
9472 bool LegalOperations) {
9473 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9474 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
9475
9476 SDValue SetCC = N->getOperand(0);
9477 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
9478 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
9479 return SDValue();
9480
9481 SDValue X = SetCC.getOperand(0);
9482 SDValue Ones = SetCC.getOperand(1);
9483 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
9484 EVT VT = N->getValueType(0);
9485 EVT XVT = X.getValueType();
9486 // setge X, C is canonicalized to setgt, so we do not need to match that
9487 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
9488 // not require the 'not' op.
9489 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
9490 // Invert and smear/shift the sign bit:
9491 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
9492 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
9493 SDLoc DL(N);
9494 SDValue NotX = DAG.getNOT(DL, X, VT);
9495 SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
9496 auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
9497 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
9498 }
9499 return SDValue();
9500}
9501
9502SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
9503 SDValue N0 = N->getOperand(0);
9504 EVT VT = N->getValueType(0);
9505 SDLoc DL(N);
9506
9507 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9508 return Res;
9509
9510 // fold (sext (sext x)) -> (sext x)
9511 // fold (sext (aext x)) -> (sext x)
9512 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9513 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
9514
9515 if (N0.getOpcode() == ISD::TRUNCATE) {
9516 // fold (sext (truncate (load x))) -> (sext (smaller load x))
9517 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
9518 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9519 SDNode *oye = N0.getOperand(0).getNode();
9520 if (NarrowLoad.getNode() != N0.getNode()) {
9521 CombineTo(N0.getNode(), NarrowLoad);
9522 // CombineTo deleted the truncate, if needed, but not what's under it.
9523 AddToWorklist(oye);
9524 }
9525 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9526 }
9527
9528 // See if the value being truncated is already sign extended. If so, just
9529 // eliminate the trunc/sext pair.
9530 SDValue Op = N0.getOperand(0);
9531 unsigned OpBits = Op.getScalarValueSizeInBits();
9532 unsigned MidBits = N0.getScalarValueSizeInBits();
9533 unsigned DestBits = VT.getScalarSizeInBits();
9534 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
9535
9536 if (OpBits == DestBits) {
9537 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
9538 // bits, it is already ready.
9539 if (NumSignBits > DestBits-MidBits)
9540 return Op;
9541 } else if (OpBits < DestBits) {
9542 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
9543 // bits, just sext from i32.
9544 if (NumSignBits > OpBits-MidBits)
9545 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
9546 } else {
9547 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
9548 // bits, just truncate to i32.
9549 if (NumSignBits > OpBits-MidBits)
9550 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
9551 }
9552
9553 // fold (sext (truncate x)) -> (sextinreg x).
9554 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
9555 N0.getValueType())) {
9556 if (OpBits < DestBits)
9557 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
9558 else if (OpBits > DestBits)
9559 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
9560 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
9561 DAG.getValueType(N0.getValueType()));
9562 }
9563 }
9564
9565 // Try to simplify (sext (load x)).
9566 if (SDValue foldedExt =
9567 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9569 return foldedExt;
9570
9571 // fold (sext (load x)) to multiple smaller sextloads.
9572 // Only on illegal but splittable vectors.
9573 if (SDValue ExtLoad = CombineExtLoad(N))
9574 return ExtLoad;
9575
9576 // Try to simplify (sext (sextload x)).
9577 if (SDValue foldedExt = tryToFoldExtOfExtload(
9578 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
9579 return foldedExt;
9580
9581 // fold (sext (and/or/xor (load x), cst)) ->
9582 // (and/or/xor (sextload x), (sext cst))
9583 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9584 N0.getOpcode() == ISD::XOR) &&
9585 isa<LoadSDNode>(N0.getOperand(0)) &&
9586 N0.getOperand(1).getOpcode() == ISD::Constant &&
9587 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9588 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9589 EVT MemVT = LN00->getMemoryVT();
9590 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
9591 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
9593 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9594 ISD::SIGN_EXTEND, SetCCs, TLI);
9595 if (DoXform) {
9596 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
9597 LN00->getChain(), LN00->getBasePtr(),
9598 LN00->getMemoryVT(),
9599 LN00->getMemOperand());
9600 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9601 Mask = Mask.sext(VT.getSizeInBits());
9602 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9603 ExtLoad, DAG.getConstant(Mask, DL, VT));
9604 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
9605 bool NoReplaceTruncAnd = !N0.hasOneUse();
9606 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9607 CombineTo(N, And);
9608 // If N0 has multiple uses, change other uses as well.
9609 if (NoReplaceTruncAnd) {
9610 SDValue TruncAnd =
9611 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9612 CombineTo(N0.getNode(), TruncAnd);
9613 }
9614 if (NoReplaceTrunc) {
9615 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9616 } else {
9617 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9618 LN00->getValueType(0), ExtLoad);
9619 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9620 }
9621 return SDValue(N,0); // Return N so it doesn't get rechecked!
9622 }
9623 }
9624 }
9625
9626 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9627 return V;
9628
9629 if (N0.getOpcode() == ISD::SETCC) {
9630 SDValue N00 = N0.getOperand(0);
9631 SDValue N01 = N0.getOperand(1);
9632 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9633 EVT N00VT = N0.getOperand(0).getValueType();
9634
9635 // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
9636 // Only do this before legalize for now.
9637 if (VT.isVector() && !LegalOperations &&
9638 TLI.getBooleanContents(N00VT) ==
9640 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
9641 // of the same size as the compared operands. Only optimize sext(setcc())
9642 // if this is the case.
9643 EVT SVT = getSetCCResultType(N00VT);
9644
9645 // If we already have the desired type, don't change it.
9646 if (SVT != N0.getValueType()) {
9647 // We know that the # elements of the results is the same as the
9648 // # elements of the compare (and the # elements of the compare result
9649 // for that matter). Check to see that they are the same size. If so,
9650 // we know that the element size of the sext'd result matches the
9651 // element size of the compare operands.
9652 if (VT.getSizeInBits() == SVT.getSizeInBits())
9653 return DAG.getSetCC(DL, VT, N00, N01, CC);
9654
9655 // If the desired elements are smaller or larger than the source
9656 // elements, we can use a matching integer vector type and then
9657 // truncate/sign extend.
9658 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
9659 if (SVT == MatchingVecType) {
9660 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
9661 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
9662 }
9663 }
9664 }
9665
9666 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
9667 // Here, T can be 1 or -1, depending on the type of the setcc and
9668 // getBooleanContents().
9669 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
9670
9671 // To determine the "true" side of the select, we need to know the high bit
9672 // of the value returned by the setcc if it evaluates to true.
9673 // If the type of the setcc is i1, then the true case of the select is just
9674 // sext(i1 1), that is, -1.
9675 // If the type of the setcc is larger (say, i8) then the value of the high
9676 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
9677 // of the appropriate width.
9678 SDValue ExtTrueVal = (SetCCWidth == 1)
9679 ? DAG.getAllOnesConstant(DL, VT)
9680 : DAG.getBoolConstant(true, DL, VT, N00VT);
9681 SDValue Zero = DAG.getConstant(0, DL, VT);
9682 if (SDValue SCC =
9683 SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
9684 return SCC;
9685
9686 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
9687 EVT SetCCVT = getSetCCResultType(N00VT);
9688 // Don't do this transform for i1 because there's a select transform
9689 // that would reverse it.
9690 // TODO: We should not do this transform at all without a target hook
9691 // because a sext is likely cheaper than a select?
9692 if (SetCCVT.getScalarSizeInBits() != 1 &&
9693 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
9694 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
9695 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
9696 }
9697 }
9698 }
9699
9700 // fold (sext x) -> (zext x) if the sign bit is known zero.
9701 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
9702 DAG.SignBitIsZero(N0))
9703 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
9704
9705 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9706 return NewVSel;
9707
9708 // Eliminate this sign extend by doing a negation in the destination type:
9709 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
9710 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
9714 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
9715 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
9716 }
9717 // Eliminate this sign extend by doing a decrement in the destination type:
9718 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
9719 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
9723 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
9724 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
9725 }
9726
9727 return SDValue();
9728}
9729
9730// isTruncateOf - If N is a truncate of some other value, return true, record
9731// the value being truncated in Op and which of Op's bits are zero/one in Known.
9732// This function computes KnownBits to avoid a duplicated call to
9733// computeKnownBits in the caller.
9735 KnownBits &Known) {
9736 if (N->getOpcode() == ISD::TRUNCATE) {
9737 Op = N->getOperand(0);
9738 Known = DAG.computeKnownBits(Op);
9739 return true;
9740 }
9741
9742 if (N.getOpcode() != ISD::SETCC ||
9743 N.getValueType().getScalarType() != MVT::i1 ||
9744 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
9745 return false;
9746
9747 SDValue Op0 = N->getOperand(0);
9748 SDValue Op1 = N->getOperand(1);
9749 assert(Op0.getValueType() == Op1.getValueType());
9750
9751 if (isNullOrNullSplat(Op0))
9752 Op = Op1;
9753 else if (isNullOrNullSplat(Op1))
9754 Op = Op0;
9755 else
9756 return false;
9757
9758 Known = DAG.computeKnownBits(Op);
9759
9760 return (Known.Zero | 1).isAllOnesValue();
9761}
9762
9763SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
9764 SDValue N0 = N->getOperand(0);
9765 EVT VT = N->getValueType(0);
9766
9767 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9768 return Res;
9769
9770 // fold (zext (zext x)) -> (zext x)
9771 // fold (zext (aext x)) -> (zext x)
9772 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9773 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
9774 N0.getOperand(0));
9775
9776 // fold (zext (truncate x)) -> (zext x) or
9777 // (zext (truncate x)) -> (truncate x)
9778 // This is valid when the truncated bits of x are already zero.
9779 SDValue Op;
9780 KnownBits Known;
9781 if (isTruncateOf(DAG, N0, Op, Known)) {
9782 APInt TruncatedBits =
9783 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
9784 APInt(Op.getScalarValueSizeInBits(), 0) :
9785 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
9787 std::min(Op.getScalarValueSizeInBits(),
9788 VT.getScalarSizeInBits()));
9789 if (TruncatedBits.isSubsetOf(Known.Zero))
9790 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9791 }
9792
9793 // fold (zext (truncate x)) -> (and x, mask)
9794 if (N0.getOpcode() == ISD::TRUNCATE) {
9795 // fold (zext (truncate (load x))) -> (zext (smaller load x))
9796 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
9797 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9798 SDNode *oye = N0.getOperand(0).getNode();
9799 if (NarrowLoad.getNode() != N0.getNode()) {
9800 CombineTo(N0.getNode(), NarrowLoad);
9801 // CombineTo deleted the truncate, if needed, but not what's under it.
9802 AddToWorklist(oye);
9803 }
9804 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9805 }
9806
9807 EVT SrcVT = N0.getOperand(0).getValueType();
9808 EVT MinVT = N0.getValueType();
9809
9810 // Try to mask before the extension to avoid having to generate a larger mask,
9811 // possibly over several sub-vectors.
9812 if (SrcVT.bitsLT(VT) && VT.isVector()) {
9813 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
9815 SDValue Op = N0.getOperand(0);
9816 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9817 AddToWorklist(Op.getNode());
9818 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9819 // Transfer the debug info; the new node is equivalent to N0.
9820 DAG.transferDbgValues(N0, ZExtOrTrunc);
9821 return ZExtOrTrunc;
9822 }
9823 }
9824
9825 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
9826 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9827 AddToWorklist(Op.getNode());
9828 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9829 // We may safely transfer the debug info describing the truncate node over
9830 // to the equivalent and operation.
9831 DAG.transferDbgValues(N0, And);
9832 return And;
9833 }
9834 }
9835
9836 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
9837 // if either of the casts is not free.
9838 if (N0.getOpcode() == ISD::AND &&
9839 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9840 N0.getOperand(1).getOpcode() == ISD::Constant &&
9842 N0.getValueType()) ||
9843 !TLI.isZExtFree(N0.getValueType(), VT))) {
9844 SDValue X = N0.getOperand(0).getOperand(0);
9845 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
9846 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9847 Mask = Mask.zext(VT.getSizeInBits());
9848 SDLoc DL(N);
9849 return DAG.getNode(ISD::AND, DL, VT,
9850 X, DAG.getConstant(Mask, DL, VT));
9851 }
9852
9853 // Try to simplify (zext (load x)).
9854 if (SDValue foldedExt =
9855 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9857 return foldedExt;
9858
9859 // fold (zext (load x)) to multiple smaller zextloads.
9860 // Only on illegal but splittable vectors.
9861 if (SDValue ExtLoad = CombineExtLoad(N))
9862 return ExtLoad;
9863
9864 // fold (zext (and/or/xor (load x), cst)) ->
9865 // (and/or/xor (zextload x), (zext cst))
9866 // Unless (and (load x) cst) will match as a zextload already and has
9867 // additional users.
9868 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9869 N0.getOpcode() == ISD::XOR) &&
9870 isa<LoadSDNode>(N0.getOperand(0)) &&
9871 N0.getOperand(1).getOpcode() == ISD::Constant &&
9872 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9873 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9874 EVT MemVT = LN00->getMemoryVT();
9875 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
9876 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
9877 bool DoXform = true;
9879 if (!N0.hasOneUse()) {
9880 if (N0.getOpcode() == ISD::AND) {
9881 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
9882 EVT LoadResultTy = AndC->getValueType(0);
9883 EVT ExtVT;
9884 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
9885 DoXform = false;
9886 }
9887 }
9888 if (DoXform)
9889 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9890 ISD::ZERO_EXTEND, SetCCs, TLI);
9891 if (DoXform) {
9892 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
9893 LN00->getChain(), LN00->getBasePtr(),
9894 LN00->getMemoryVT(),
9895 LN00->getMemOperand());
9896 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9897 Mask = Mask.zext(VT.getSizeInBits());
9898 SDLoc DL(N);
9899 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9900 ExtLoad, DAG.getConstant(Mask, DL, VT));
9901 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9902 bool NoReplaceTruncAnd = !N0.hasOneUse();
9903 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9904 CombineTo(N, And);
9905 // If N0 has multiple uses, change other uses as well.
9906 if (NoReplaceTruncAnd) {
9907 SDValue TruncAnd =
9908 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9909 CombineTo(N0.getNode(), TruncAnd);
9910 }
9911 if (NoReplaceTrunc) {
9912 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9913 } else {
9914 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9915 LN00->getValueType(0), ExtLoad);
9916 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9917 }
9918 return SDValue(N,0); // Return N so it doesn't get rechecked!
9919 }
9920 }
9921 }
9922
9923 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9924 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9925 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
9926 return ZExtLoad;
9927
9928 // Try to simplify (zext (zextload x)).
9929 if (SDValue foldedExt = tryToFoldExtOfExtload(
9930 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
9931 return foldedExt;
9932
9933 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9934 return V;
9935
9936 if (N0.getOpcode() == ISD::SETCC) {
9937 // Only do this before legalize for now.
9938 if (!LegalOperations && VT.isVector() &&
9940 EVT N00VT = N0.getOperand(0).getValueType();
9941 if (getSetCCResultType(N00VT) == N0.getValueType())
9942 return SDValue();
9943
9944 // We know that the # elements of the results is the same as the #
9945 // elements of the compare (and the # elements of the compare result for
9946 // that matter). Check to see that they are the same size. If so, we know
9947 // that the element size of the sext'd result matches the element size of
9948 // the compare operands.
9949 SDLoc DL(N);
9950 SDValue VecOnes = DAG.getConstant(1, DL, VT);
9951 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
9952 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
9953 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
9954 N0.getOperand(1), N0.getOperand(2));
9955 return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
9956 }
9957
9958 // If the desired elements are smaller or larger than the source
9959 // elements we can use a matching integer vector type and then
9960 // truncate/sign extend.
9961 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9962 SDValue VsetCC =
9963 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
9964 N0.getOperand(1), N0.getOperand(2));
9965 return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
9966 VecOnes);
9967 }
9968
9969 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9970 SDLoc DL(N);
9971 if (SDValue SCC = SimplifySelectCC(
9972 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9973 DAG.getConstant(0, DL, VT),
9974 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9975 return SCC;
9976 }
9977
9978 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
9979 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9980 isa<ConstantSDNode>(N0.getOperand(1)) &&
9982 N0.hasOneUse()) {
9983 SDValue ShAmt = N0.getOperand(1);
9984 if (N0.getOpcode() == ISD::SHL) {
9985 SDValue InnerZExt = N0.getOperand(0);
9986 // If the original shl may be shifting out bits, do not perform this
9987 // transformation.
9988 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
9989 InnerZExt.getOperand(0).getValueSizeInBits();
9990 if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
9991 return SDValue();
9992 }
9993
9994 SDLoc DL(N);
9995
9996 // Ensure that the shift amount is wide enough for the shifted value.
9997 if (VT.getSizeInBits() >= 256)
9998 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
9999
10000 return DAG.getNode(N0.getOpcode(), DL, VT,
10001 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
10002 ShAmt);
10003 }
10004
10005 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10006 return NewVSel;
10007
10008 return SDValue();
10009}
10010
10011SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
10012 SDValue N0 = N->getOperand(0);
10013 EVT VT = N->getValueType(0);
10014
10015 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10016 return Res;
10017
10018 // fold (aext (aext x)) -> (aext x)
10019 // fold (aext (zext x)) -> (zext x)
10020 // fold (aext (sext x)) -> (sext x)
10021 if (N0.getOpcode() == ISD::ANY_EXTEND ||
10022 N0.getOpcode() == ISD::ZERO_EXTEND ||
10024 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10025
10026 // fold (aext (truncate (load x))) -> (aext (smaller load x))
10027 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
10028 if (N0.getOpcode() == ISD::TRUNCATE) {
10029 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10030 SDNode *oye = N0.getOperand(0).getNode();
10031 if (NarrowLoad.getNode() != N0.getNode()) {
10032 CombineTo(N0.getNode(), NarrowLoad);
10033 // CombineTo deleted the truncate, if needed, but not what's under it.
10034 AddToWorklist(oye);
10035 }
10036 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10037 }
10038 }
10039
10040 // fold (aext (truncate x))
10041 if (N0.getOpcode() == ISD::TRUNCATE)
10042 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10043
10044 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
10045 // if the trunc is not free.
10046 if (N0.getOpcode() == ISD::AND &&
10047 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10048 N0.getOperand(1).getOpcode() == ISD::Constant &&
10050 N0.getValueType())) {
10051 SDLoc DL(N);
10052 SDValue X = N0.getOperand(0).getOperand(0);
10053 X = DAG.getAnyExtOrTrunc(X, DL, VT);
10054 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
10055 Mask = Mask.zext(VT.getSizeInBits());
10056 return DAG.getNode(ISD::AND, DL, VT,
10057 X, DAG.getConstant(Mask, DL, VT));
10058 }
10059
10060 // fold (aext (load x)) -> (aext (truncate (extload x)))
10061 // None of the supported targets knows how to perform load and any_ext
10062 // on vectors in one instruction. We only perform this transformation on
10063 // scalars.
10064 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
10066 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10067 bool DoXform = true;
10069 if (!N0.hasOneUse())
10070 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
10071 TLI);
10072 if (DoXform) {
10073 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10074 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10075 LN0->getChain(),
10076 LN0->getBasePtr(), N0.getValueType(),
10077 LN0->getMemOperand());
10078 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
10079 // If the load value is used only by N, replace it via CombineTo N.
10080 bool NoReplaceTrunc = N0.hasOneUse();
10081 CombineTo(N, ExtLoad);
10082 if (NoReplaceTrunc) {
10083 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10084 recursivelyDeleteUnusedNodes(LN0);
10085 } else {
10086 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
10087 N0.getValueType(), ExtLoad);
10088 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10089 }
10090 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10091 }
10092 }
10093
10094 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
10095 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
10096 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
10097 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
10098 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
10099 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10100 ISD::LoadExtType ExtType = LN0->getExtensionType();
10101 EVT MemVT = LN0->getMemoryVT();
10102 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
10103 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
10104 VT, LN0->getChain(), LN0->getBasePtr(),
10105 MemVT, LN0->getMemOperand());
10106 CombineTo(N, ExtLoad);
10107 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10108 recursivelyDeleteUnusedNodes(LN0);
10109 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10110 }
10111 }
10112
10113 if (N0.getOpcode() == ISD::SETCC) {
10114 // For vectors:
10115 // aext(setcc) -> vsetcc
10116 // aext(setcc) -> truncate(vsetcc)
10117 // aext(setcc) -> aext(vsetcc)
10118 // Only do this before legalize for now.
10119 if (VT.isVector() && !LegalOperations) {
10120 EVT N00VT = N0.getOperand(0).getValueType();
10121 if (getSetCCResultType(N00VT) == N0.getValueType())
10122 return SDValue();
10123
10124 // We know that the # elements of the results is the same as the
10125 // # elements of the compare (and the # elements of the compare result
10126 // for that matter). Check to see that they are the same size. If so,
10127 // we know that the element size of the sext'd result matches the
10128 // element size of the compare operands.
10129 if (VT.getSizeInBits() == N00VT.getSizeInBits())
10130 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
10131 N0.getOperand(1),
10132 cast<CondCodeSDNode>(N0.getOperand(2))->get());
10133
10134 // If the desired elements are smaller or larger than the source
10135 // elements we can use a matching integer vector type and then
10136 // truncate/any extend
10137 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10138 SDValue VsetCC =
10139 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
10140 N0.getOperand(1),
10141 cast<CondCodeSDNode>(N0.getOperand(2))->get());
10142 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
10143 }
10144
10145 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10146 SDLoc DL(N);
10147 if (SDValue SCC = SimplifySelectCC(
10148 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10149 DAG.getConstant(0, DL, VT),
10150 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10151 return SCC;
10152 }
10153
10154 return SDValue();
10155}
10156
10157SDValue DAGCombiner::visitAssertExt(SDNode *N) {
10158 unsigned Opcode = N->getOpcode();
10159 SDValue N0 = N->getOperand(0);
10160 SDValue N1 = N->getOperand(1);
10161 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
10162
10163 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
10164 if (N0.getOpcode() == Opcode &&
10165 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
10166 return N0;
10167
10168 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10169 N0.getOperand(0).getOpcode() == Opcode) {
10170 // We have an assert, truncate, assert sandwich. Make one stronger assert
10171 // by asserting on the smallest asserted type to the larger source type.
10172 // This eliminates the later assert:
10173 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
10174 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
10175 SDValue BigA = N0.getOperand(0);
10176 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10177 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10178 "Asserting zero/sign-extended bits to a type larger than the "
10179 "truncated destination does not provide information");
10180
10181 SDLoc DL(N);
10182 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
10183 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
10184 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10185 BigA.getOperand(0), MinAssertVTVal);
10186 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10187 }
10188
10189 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
10190 // than X. Just move the AssertZext in front of the truncate and drop the
10191 // AssertSExt.
10192 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10194 Opcode == ISD::AssertZext) {
10195 SDValue BigA = N0.getOperand(0);
10196 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10197 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10198 "Asserting zero/sign-extended bits to a type larger than the "
10199 "truncated destination does not provide information");
10200
10201 if (AssertVT.bitsLT(BigA_AssertVT)) {
10202 SDLoc DL(N);
10203 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10204 BigA.getOperand(0), N1);
10205 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10206 }
10207 }
10208
10209 return SDValue();
10210}
10211
10212/// If the result of a wider load is shifted to right of N bits and then
10213/// truncated to a narrower type and where N is a multiple of number of bits of
10214/// the narrower type, transform it to a narrower load from address + N / num of
10215/// bits of new type. Also narrow the load if the result is masked with an AND
10216/// to effectively produce a smaller type. If the result is to be extended, also
10217/// fold the extension to form a extending load.
10218SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
10219 unsigned Opc = N->getOpcode();
10220
10222 SDValue N0 = N->getOperand(0);
10223 EVT VT = N->getValueType(0);
10224 EVT ExtVT = VT;
10225
10226 // This transformation isn't valid for vector loads.
10227 if (VT.isVector())
10228 return SDValue();
10229
10230 unsigned ShAmt = 0;
10231 bool HasShiftedOffset = false;
10232 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
10233 // extended to VT.
10234 if (Opc == ISD::SIGN_EXTEND_INREG) {
10235 ExtType = ISD::SEXTLOAD;
10236 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10237 } else if (Opc == ISD::SRL) {
10238 // Another special-case: SRL is basically zero-extending a narrower value,
10239 // or it maybe shifting a higher subword, half or byte into the lowest
10240 // bits.
10241 ExtType = ISD::ZEXTLOAD;
10242 N0 = SDValue(N, 0);
10243
10244 auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
10245 auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10246 if (!N01 || !LN0)
10247 return SDValue();
10248
10249 uint64_t ShiftAmt = N01->getZExtValue();
10250 uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
10251 if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
10252 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
10253 else
10254 ExtVT = EVT::getIntegerVT(*DAG.getContext(),
10255 VT.getSizeInBits() - ShiftAmt);
10256 } else if (Opc == ISD::AND) {
10257 // An AND with a constant mask is the same as a truncate + zero-extend.
10258 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
10259 if (!AndC)
10260 return SDValue();
10261
10262 const APInt &Mask = AndC->getAPIntValue();
10263 unsigned ActiveBits = 0;
10264 if (Mask.isMask()) {
10265 ActiveBits = Mask.countTrailingOnes();
10266 } else if (Mask.isShiftedMask()) {
10267 ShAmt = Mask.countTrailingZeros();
10268 APInt ShiftedMask = Mask.lshr(ShAmt);
10269 ActiveBits = ShiftedMask.countTrailingOnes();
10270 HasShiftedOffset = true;
10271 } else
10272 return SDValue();
10273
10274 ExtType = ISD::ZEXTLOAD;
10275 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
10276 }
10277
10278 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
10279 SDValue SRL = N0;
10280 if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
10281 ShAmt = ConstShift->getZExtValue();
10282 unsigned EVTBits = ExtVT.getSizeInBits();
10283 // Is the shift amount a multiple of size of VT?
10284 if ((ShAmt & (EVTBits-1)) == 0) {
10285 N0 = N0.getOperand(0);
10286 // Is the load width a multiple of size of VT?
10287 if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
10288 return SDValue();
10289 }
10290
10291 // At this point, we must have a load or else we can't do the transform.
10292 if (!isa<LoadSDNode>(N0)) return SDValue();
10293
10294 auto *LN0 = cast<LoadSDNode>(N0);
10295
10296 // Because a SRL must be assumed to *need* to zero-extend the high bits
10297 // (as opposed to anyext the high bits), we can't combine the zextload
10298 // lowering of SRL and an sextload.
10299 if (LN0->getExtensionType() == ISD::SEXTLOAD)
10300 return SDValue();
10301
10302 // If the shift amount is larger than the input type then we're not
10303 // accessing any of the loaded bytes. If the load was a zextload/extload
10304 // then the result of the shift+trunc is zero/undef (handled elsewhere).
10305 if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
10306 return SDValue();
10307
10308 // If the SRL is only used by a masking AND, we may be able to adjust
10309 // the ExtVT to make the AND redundant.
10310 SDNode *Mask = *(SRL->use_begin());
10311 if (Mask->getOpcode() == ISD::AND &&
10312 isa<ConstantSDNode>(Mask->getOperand(1))) {
10313 const APInt &ShiftMask =
10314 cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
10315 if (ShiftMask.isMask()) {
10316 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
10317 ShiftMask.countTrailingOnes());
10318 // If the mask is smaller, recompute the type.
10319 if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
10320 TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
10321 ExtVT = MaskedVT;
10322 }
10323 }
10324 }
10325 }
10326
10327 // If the load is shifted left (and the result isn't shifted back right),
10328 // we can fold the truncate through the shift.
10329 unsigned ShLeftAmt = 0;
10330 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10331 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
10332 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
10333 ShLeftAmt = N01->getZExtValue();
10334 N0 = N0.getOperand(0);
10335 }
10336 }
10337
10338 // If we haven't found a load, we can't narrow it.
10339 if (!isa<LoadSDNode>(N0))
10340 return SDValue();
10341
10342 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10343 if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
10344 return SDValue();
10345
10346 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
10347 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
10348 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
10349 return LVTStoreBits - EVTStoreBits - ShAmt;
10350 };
10351
10352 // For big endian targets, we need to adjust the offset to the pointer to
10353 // load the correct bytes.
10354 if (DAG.getDataLayout().isBigEndian())
10355 ShAmt = AdjustBigEndianShift(ShAmt);
10356
10357 EVT PtrType = N0.getOperand(1).getValueType();
10358 uint64_t PtrOff = ShAmt / 8;
10359 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
10360 SDLoc DL(LN0);
10361 // The original load itself didn't wrap, so an offset within it doesn't.
10362 SDNodeFlags Flags;
10363 Flags.setNoUnsignedWrap(true);
10364 SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
10365 PtrType, LN0->getBasePtr(),
10366 DAG.getConstant(PtrOff, DL, PtrType),
10367 Flags);
10368 AddToWorklist(NewPtr.getNode());
10369
10370 SDValue Load;
10371 if (ExtType == ISD::NON_EXTLOAD)
10372 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
10373 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10374 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10375 else
10376 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
10377 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
10378 NewAlign, LN0->getMemOperand()->getFlags(),
10379 LN0->getAAInfo());
10380
10381 // Replace the old load's chain with the new load's chain.
10382 WorklistRemover DeadNodes(*this);
10383 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10384
10385 // Shift the result left, if we've swallowed a left shift.
10387 if (ShLeftAmt != 0) {
10388 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
10389 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
10390 ShImmTy = VT;
10391 // If the shift amount is as large as the result size (but, presumably,
10392 // no larger than the source) then the useful bits of the result are
10393 // zero; we can't simply return the shortened shift, because the result
10394 // of that operation is undefined.
10395 SDLoc DL(N0);
10396 if (ShLeftAmt >= VT.getSizeInBits())
10397 Result = DAG.getConstant(0, DL, VT);
10398 else
10399 Result = DAG.getNode(ISD::SHL, DL, VT,
10400 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
10401 }
10402
10403 if (HasShiftedOffset) {
10404 // Recalculate the shift amount after it has been altered to calculate
10405 // the offset.
10406 if (DAG.getDataLayout().isBigEndian())
10407 ShAmt = AdjustBigEndianShift(ShAmt);
10408
10409 // We're using a shifted mask, so the load now has an offset. This means
10410 // that data has been loaded into the lower bytes than it would have been
10411 // before, so we need to shl the loaded data into the correct position in the
10412 // register.
10413 SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
10414 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
10415 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
10416 }
10417
10418 // Return the new loaded value.
10419 return Result;
10420}
10421
10422SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
10423 SDValue N0 = N->getOperand(0);
10424 SDValue N1 = N->getOperand(1);
10425 EVT VT = N->getValueType(0);
10426 EVT EVT = cast<VTSDNode>(N1)->getVT();
10427 unsigned VTBits = VT.getScalarSizeInBits();
10428 unsigned EVTBits = EVT.getScalarSizeInBits();
10429
10430 if (N0.isUndef())
10431 return DAG.getUNDEF(VT);
10432
10433 // fold (sext_in_reg c1) -> c1
10435 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
10436
10437 // If the input is already sign extended, just drop the extension.
10438 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
10439 return N0;
10440
10441 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
10442 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
10443 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
10444 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10445 N0.getOperand(0), N1);
10446
10447 // fold (sext_in_reg (sext x)) -> (sext x)
10448 // fold (sext_in_reg (aext x)) -> (sext x)
10449 // if x is small enough or if we know that x has more than 1 sign bit and the
10450 // sign_extend_inreg is extending from one of them.
10451 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
10452 SDValue N00 = N0.getOperand(0);
10453 unsigned N00Bits = N00.getScalarValueSizeInBits();
10454 if ((N00Bits <= EVTBits ||
10455 (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
10456 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10457 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
10458 }
10459
10460 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
10464 N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
10465 if (!LegalOperations ||
10468 N0.getOperand(0));
10469 }
10470
10471 // fold (sext_in_reg (zext x)) -> (sext x)
10472 // iff we are extending the source sign bit.
10473 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
10474 SDValue N00 = N0.getOperand(0);
10475 if (N00.getScalarValueSizeInBits() == EVTBits &&
10476 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10477 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
10478 }
10479
10480 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
10481 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
10482 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
10483
10484 // fold operands of sext_in_reg based on knowledge that the top bits are not
10485 // demanded.
10487 return SDValue(N, 0);
10488
10489 // fold (sext_in_reg (load x)) -> (smaller sextload x)
10490 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
10491 if (SDValue NarrowLoad = ReduceLoadWidth(N))
10492 return NarrowLoad;
10493
10494 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
10495 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
10496 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
10497 if (N0.getOpcode() == ISD::SRL) {
10498 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
10499 if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
10500 // We can turn this into an SRA iff the input to the SRL is already sign
10501 // extended enough.
10502 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
10503 if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
10504 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
10505 N0.getOperand(1));
10506 }
10507 }
10508
10509 // fold (sext_inreg (extload x)) -> (sextload x)
10510 // If sextload is not supported by target, we can only do the combine when
10511 // load has one use. Doing otherwise can block folding the extload with other
10512 // extends that the target does support.
10513 if (ISD::isEXTLoad(N0.getNode()) &&
10515 EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10516 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
10517 N0.hasOneUse()) ||
10518 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10519 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10520 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10521 LN0->getChain(),
10522 LN0->getBasePtr(), EVT,
10523 LN0->getMemOperand());
10524 CombineTo(N, ExtLoad);
10525 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10526 AddToWorklist(ExtLoad.getNode());
10527 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10528 }
10529 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
10531 N0.hasOneUse() &&
10532 EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10533 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
10534 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10535 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10536 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10537 LN0->getChain(),
10538 LN0->getBasePtr(), EVT,
10539 LN0->getMemOperand());
10540 CombineTo(N, ExtLoad);
10541 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10542 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10543 }
10544
10545 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
10546 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
10547 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
10548 N0.getOperand(1), false))
10549 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10550 BSwap, N1);
10551 }
10552
10553 return SDValue();
10554}
10555
10556SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
10557 SDValue N0 = N->getOperand(0);
10558 EVT VT = N->getValueType(0);
10559
10560 if (N0.isUndef())
10561 return DAG.getUNDEF(VT);
10562
10563 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10564 return Res;
10565
10567 return SDValue(N, 0);
10568
10569 return SDValue();
10570}
10571
10572SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
10573 SDValue N0 = N->getOperand(0);
10574 EVT VT = N->getValueType(0);
10575
10576 if (N0.isUndef())
10577 return DAG.getUNDEF(VT);
10578
10579 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10580 return Res;
10581
10583 return SDValue(N, 0);
10584
10585 return SDValue();
10586}
10587
10588SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
10589 SDValue N0 = N->getOperand(0);
10590 EVT VT = N->getValueType(0);
10591 EVT SrcVT = N0.getValueType();
10592 bool isLE = DAG.getDataLayout().isLittleEndian();
10593
10594 // noop truncate
10595 if (SrcVT == VT)
10596 return N0;
10597
10598 // fold (truncate (truncate x)) -> (truncate x)
10599 if (N0.getOpcode() == ISD::TRUNCATE)
10600 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10601
10602 // fold (truncate c1) -> c1
10604 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
10605 if (C.getNode() != N)
10606 return C;
10607 }
10608
10609 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
10610 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
10611 N0.getOpcode() == ISD::SIGN_EXTEND ||
10612 N0.getOpcode() == ISD::ANY_EXTEND) {
10613 // if the source is smaller than the dest, we still need an extend.
10614 if (N0.getOperand(0).getValueType().bitsLT(VT))
10615 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10616 // if the source is larger than the dest, than we just need the truncate.
10617 if (N0.getOperand(0).getValueType().bitsGT(VT))
10618 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10619 // if the source and dest are the same type, we can drop both the extend
10620 // and the truncate.
10621 return N0.getOperand(0);
10622 }
10623
10624 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
10625 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
10626 return SDValue();
10627
10628 // Fold extract-and-trunc into a narrow extract. For example:
10629 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
10630 // i32 y = TRUNCATE(i64 x)
10631 // -- becomes --
10632 // v16i8 b = BITCAST (v2i64 val)
10633 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
10634 //
10635 // Note: We only run this optimization after type legalization (which often
10636 // creates this pattern) and before operation legalization after which
10637 // we need to be more careful about the vector instructions that we generate.
10638 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10639 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
10640 EVT VecTy = N0.getOperand(0).getValueType();
10641 EVT ExTy = N0.getValueType();
10642 EVT TrTy = N->getValueType(0);
10643
10644 unsigned NumElem = VecTy.getVectorNumElements();
10645 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
10646
10647 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
10648 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
10649
10650 SDValue EltNo = N0->getOperand(1);
10651 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
10652 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
10653 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
10654 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
10655
10656 SDLoc DL(N);
10657 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
10658 DAG.getBitcast(NVT, N0.getOperand(0)),
10659 DAG.getConstant(Index, DL, IndexTy));
10660 }
10661 }
10662
10663 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
10664 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
10665 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
10666 TLI.isTruncateFree(SrcVT, VT)) {
10667 SDLoc SL(N0);
10668 SDValue Cond = N0.getOperand(0);
10669 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10670 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
10671 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
10672 }
10673 }
10674
10675 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
10676 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10677 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
10678 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
10679 SDValue Amt = N0.getOperand(1);
10680 KnownBits Known = DAG.computeKnownBits(Amt);
10681 unsigned Size = VT.getScalarSizeInBits();
10682 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
10683 SDLoc SL(N);
10684 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
10685
10686 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10687 if (AmtVT != Amt.getValueType()) {
10688 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
10689 AddToWorklist(Amt.getNode());
10690 }
10691 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
10692 }
10693 }
10694
10695 // Attempt to pre-truncate BUILD_VECTOR sources.
10696 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
10697 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
10698 SDLoc DL(N);
10699 EVT SVT = VT.getScalarType();
10700 SmallVector<SDValue, 8> TruncOps;
10701 for (const SDValue &Op : N0->op_values()) {
10702 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
10703 TruncOps.push_back(TruncOp);
10704 }
10705 return DAG.getBuildVector(VT, DL, TruncOps);
10706 }
10707
10708 // Fold a series of buildvector, bitcast, and truncate if possible.
10709 // For example fold
10710 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
10711 // (2xi32 (buildvector x, y)).
10712 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
10713 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
10715 N0.getOperand(0).hasOneUse()) {
10716 SDValue BuildVect = N0.getOperand(0);
10717 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
10718 EVT TruncVecEltTy = VT.getVectorElementType();
10719
10720 // Check that the element types match.
10721 if (BuildVectEltTy == TruncVecEltTy) {
10722 // Now we only need to compute the offset of the truncated elements.
10723 unsigned BuildVecNumElts = BuildVect.getNumOperands();
10724 unsigned TruncVecNumElts = VT.getVectorNumElements();
10725 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
10726
10727 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
10728 "Invalid number of elements");
10729
10731 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
10732 Opnds.push_back(BuildVect.getOperand(i));
10733
10734 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
10735 }
10736 }
10737
10738 // See if we can simplify the input to this truncate through knowledge that
10739 // only the low bits are being used.
10740 // For example "trunc (or (shl x, 8), y)" // -> trunc y
10741 // Currently we only perform this optimization on scalars because vectors
10742 // may have different active low bits.
10743 if (!VT.isVector()) {
10744 APInt Mask =
10746 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
10747 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
10748 }
10749
10750 // fold (truncate (load x)) -> (smaller load x)
10751 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
10752 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
10753 if (SDValue Reduced = ReduceLoadWidth(N))
10754 return Reduced;
10755
10756 // Handle the case where the load remains an extending load even
10757 // after truncation.
10758 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
10759 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10760 if (!LN0->isVolatile() &&
10762 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
10763 VT, LN0->getChain(), LN0->getBasePtr(),
10764 LN0->getMemoryVT(),
10765 LN0->getMemOperand());
10766 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
10767 return NewLoad;
10768 }
10769 }
10770 }
10771
10772 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
10773 // where ... are all 'undef'.
10774 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
10776 SDValue V;
10777 unsigned Idx = 0;
10778 unsigned NumDefs = 0;
10779
10780 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
10781 SDValue X = N0.getOperand(i);
10782 if (!X.isUndef()) {
10783 V = X;
10784 Idx = i;
10785 NumDefs++;
10786 }
10787 // Stop if more than one members are non-undef.
10788 if (NumDefs > 1)
10789 break;
10792 X.getValueType().getVectorNumElements()));
10793 }
10794
10795 if (NumDefs == 0)
10796 return DAG.getUNDEF(VT);
10797
10798 if (NumDefs == 1) {
10799 assert(V.getNode() && "The single defined operand is empty!");
10801 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
10802 if (i != Idx) {
10803 Opnds.push_back(DAG.getUNDEF(VTs[i]));
10804 continue;
10805 }
10806 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
10807 AddToWorklist(NV.getNode());
10808 Opnds.push_back(NV);
10809 }
10810 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
10811 }
10812 }
10813
10814 // Fold truncate of a bitcast of a vector to an extract of the low vector
10815 // element.
10816 //
10817 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
10818 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
10819 SDValue VecSrc = N0.getOperand(0);
10820 EVT SrcVT = VecSrc.getValueType();
10821 if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
10822 (!LegalOperations ||
10824 SDLoc SL(N);
10825
10826 EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
10827 unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
10828 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
10829 VecSrc, DAG.getConstant(Idx, SL, IdxVT));
10830 }
10831 }
10832
10833 // Simplify the operands using demanded-bits information.
10834 if (!VT.isVector() &&
10836 return SDValue(N, 0);
10837
10838 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
10839 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
10840 // When the adde's carry is not used.
10841 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
10842 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
10843 // We only do for addcarry before legalize operation
10844 ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
10845 TLI.isOperationLegal(N0.getOpcode(), VT))) {
10846 SDLoc SL(N);
10847 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10848 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10849 auto VTs = DAG.getVTList(VT, N0->getValueType(1));
10850 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
10851 }
10852
10853 // fold (truncate (extract_subvector(ext x))) ->
10854 // (extract_subvector x)
10855 // TODO: This can be generalized to cover cases where the truncate and extract
10856 // do not fully cancel each other out.
10857 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
10858 SDValue N00 = N0.getOperand(0);
10859 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
10860 N00.getOpcode() == ISD::ZERO_EXTEND ||
10861 N00.getOpcode() == ISD::ANY_EXTEND) {
10862 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
10864 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
10865 N00.getOperand(0), N0.getOperand(1));
10866 }
10867 }
10868
10869 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10870 return NewVSel;
10871
10872 // Narrow a suitable binary operation with a non-opaque constant operand by
10873 // moving it ahead of the truncate. This is limited to pre-legalization
10874 // because targets may prefer a wider type during later combines and invert
10875 // this transform.
10876 switch (N0.getOpcode()) {
10877 case ISD::ADD:
10878 case ISD::SUB:
10879 case ISD::MUL:
10880 case ISD::AND:
10881 case ISD::OR:
10882 case ISD::XOR:
10883 if (!LegalOperations && N0.hasOneUse() &&
10884 (isConstantOrConstantVector(N0.getOperand(0), true) ||
10885 isConstantOrConstantVector(N0.getOperand(1), true))) {
10886 // TODO: We already restricted this to pre-legalization, but for vectors
10887 // we are extra cautious to not create an unsupported operation.
10888 // Target-specific changes are likely needed to avoid regressions here.
10889 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
10890 SDLoc DL(N);
10891 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
10892 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
10893 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
10894 }
10895 }
10896 }
10897
10898 return SDValue();
10899}
10900
10901static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
10902 SDValue Elt = N->getOperand(i);
10903 if (Elt.getOpcode() != ISD::MERGE_VALUES)
10904 return Elt.getNode();
10905 return Elt.getOperand(Elt.getResNo()).getNode();
10906}
10907
10908/// build_pair (load, load) -> load
10909/// if load locations are consecutive.
10910SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
10911 assert(N->getOpcode() == ISD::BUILD_PAIR);
10912
10913 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
10914 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
10915
10916 // A BUILD_PAIR is always having the least significant part in elt 0 and the
10917 // most significant part in elt 1. So when combining into one large load, we
10918 // need to consider the endianness.
10919 if (DAG.getDataLayout().isBigEndian())
10920 std::swap(LD1, LD2);
10921
10922 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
10923 LD1->getAddressSpace() != LD2->getAddressSpace())
10924 return SDValue();
10925 EVT LD1VT = LD1->getValueType(0);
10926 unsigned LD1Bytes = LD1VT.getStoreSize();
10927 if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
10928 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
10929 unsigned Align = LD1->getAlignment();
10930 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
10931 VT.getTypeForEVT(*DAG.getContext()));
10932
10933 if (NewAlign <= Align &&
10934 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
10935 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
10936 LD1->getPointerInfo(), Align);
10937 }
10938
10939 return SDValue();
10940}
10941
10942static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
10943 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
10944 // and Lo parts; on big-endian machines it doesn't.
10945 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
10946}
10947
10949 const TargetLowering &TLI) {
10950 // If this is not a bitcast to an FP type or if the target doesn't have
10951 // IEEE754-compliant FP logic, we're done.
10952 EVT VT = N->getValueType(0);
10953 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
10954 return SDValue();
10955
10956 // TODO: Handle cases where the integer constant is a different scalar
10957 // bitwidth to the FP.
10958 SDValue N0 = N->getOperand(0);
10959 EVT SourceVT = N0.getValueType();
10960 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
10961 return SDValue();
10962
10963 unsigned FPOpcode;
10964 APInt SignMask;
10965 switch (N0.getOpcode()) {
10966 case ISD::AND:
10967 FPOpcode = ISD::FABS;
10968 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
10969 break;
10970 case ISD::XOR:
10971 FPOpcode = ISD::FNEG;
10972 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10973 break;
10974 case ISD::OR:
10975 FPOpcode = ISD::FABS;
10976 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10977 break;
10978 default:
10979 return SDValue();
10980 }
10981
10982 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
10983 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
10984 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
10985 // fneg (fabs X)
10986 SDValue LogicOp0 = N0.getOperand(0);
10987 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
10988 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
10989 LogicOp0.getOpcode() == ISD::BITCAST &&
10990 LogicOp0.getOperand(0).getValueType() == VT) {
10991 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
10992 NumFPLogicOpsConv++;
10993 if (N0.getOpcode() == ISD::OR)
10994 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
10995 return FPOp;
10996 }
10997
10998 return SDValue();
10999}
11000
11001SDValue DAGCombiner::visitBITCAST(SDNode *N) {
11002 SDValue N0 = N->getOperand(0);
11003 EVT VT = N->getValueType(0);
11004
11005 if (N0.isUndef())
11006 return DAG.getUNDEF(VT);
11007
11008 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
11009 // Only do this before legalize types, unless both types are integer and the
11010 // scalar type is legal. Only do this before legalize ops, since the target
11011 // maybe depending on the bitcast.
11012 // First check to see if this is all constant.
11013 // TODO: Support FP bitcasts after legalize types.
11014 if (VT.isVector() &&
11015 (!LegalTypes ||
11016 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
11017 TLI.isTypeLegal(VT.getVectorElementType()))) &&
11018 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
11019 cast<BuildVectorSDNode>(N0)->isConstant())
11020 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
11022
11023 // If the input is a constant, let getNode fold it.
11024 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
11025 // If we can't allow illegal operations, we need to check that this is just
11026 // a fp -> int or int -> conversion and that the resulting operation will
11027 // be legal.
11028 if (!LegalOperations ||
11029 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
11031 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
11032 TLI.isOperationLegal(ISD::Constant, VT))) {
11033 SDValue C = DAG.getBitcast(VT, N0);
11034 if (C.getNode() != N)
11035 return C;
11036 }
11037 }
11038
11039 // (conv (conv x, t1), t2) -> (conv x, t2)
11040 if (N0.getOpcode() == ISD::BITCAST)
11041 return DAG.getBitcast(VT, N0.getOperand(0));
11042
11043 // fold (conv (load x)) -> (load (conv*)x)
11044 // If the resultant load doesn't need a higher alignment than the original!
11045 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11046 // Do not remove the cast if the types differ in endian layout.
11048 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
11049 // If the load is volatile, we only want to change the load type if the
11050 // resulting load is legal. Otherwise we might increase the number of
11051 // memory accesses. We don't care if the original type was legal or not
11052 // as we assume software couldn't rely on the number of accesses of an
11053 // illegal type.
11054 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
11055 TLI.isOperationLegal(ISD::LOAD, VT))) {
11056 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11057
11058 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
11059 *LN0->getMemOperand())) {
11060 SDValue Load =
11061 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
11062 LN0->getPointerInfo(), LN0->getAlignment(),
11063 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11064 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11065 return Load;
11066 }
11067 }
11068
11069 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
11070 return V;
11071
11072 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
11073 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
11074 //
11075 // For ppc_fp128:
11076 // fold (bitcast (fneg x)) ->
11077 // flipbit = signbit
11078 // (xor (bitcast x) (build_pair flipbit, flipbit))
11079 //
11080 // fold (bitcast (fabs x)) ->
11081 // flipbit = (and (extract_element (bitcast x), 0), signbit)
11082 // (xor (bitcast x) (build_pair flipbit, flipbit))
11083 // This often reduces constant pool loads.
11084 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
11085 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
11086 N0.getNode()->hasOneUse() && VT.isInteger() &&
11087 !VT.isVector() && !N0.getValueType().isVector()) {
11088 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
11089 AddToWorklist(NewConv.getNode());
11090
11091 SDLoc DL(N);
11092 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11093 assert(VT.getSizeInBits() == 128);
11094 SDValue SignBit = DAG.getConstant(
11096 SDValue FlipBit;
11097 if (N0.getOpcode() == ISD::FNEG) {
11098 FlipBit = SignBit;
11099 AddToWorklist(FlipBit.getNode());
11100 } else {
11101 assert(N0.getOpcode() == ISD::FABS);
11102 SDValue Hi =
11103 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
11105 SDLoc(NewConv)));
11106 AddToWorklist(Hi.getNode());
11107 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
11108 AddToWorklist(FlipBit.getNode());
11109 }
11110 SDValue FlipBits =
11111 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11112 AddToWorklist(FlipBits.getNode());
11113 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
11114 }
11115 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11116 if (N0.getOpcode() == ISD::FNEG)
11117 return DAG.getNode(ISD::XOR, DL, VT,
11118 NewConv, DAG.getConstant(SignBit, DL, VT));
11119 assert(N0.getOpcode() == ISD::FABS);
11120 return DAG.getNode(ISD::AND, DL, VT,
11121 NewConv, DAG.getConstant(~SignBit, DL, VT));
11122 }
11123
11124 // fold (bitconvert (fcopysign cst, x)) ->
11125 // (or (and (bitconvert x), sign), (and cst, (not sign)))
11126 // Note that we don't handle (copysign x, cst) because this can always be
11127 // folded to an fneg or fabs.
11128 //
11129 // For ppc_fp128:
11130 // fold (bitcast (fcopysign cst, x)) ->
11131 // flipbit = (and (extract_element
11132 // (xor (bitcast cst), (bitcast x)), 0),
11133 // signbit)
11134 // (xor (bitcast cst) (build_pair flipbit, flipbit))
11135 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
11136 isa<ConstantFPSDNode>(N0.getOperand(0)) &&
11137 VT.isInteger() && !VT.isVector()) {
11138 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
11139 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
11140 if (isTypeLegal(IntXVT)) {
11141 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
11142 AddToWorklist(X.getNode());
11143
11144 // If X has a different width than the result/lhs, sext it or truncate it.
11145 unsigned VTWidth = VT.getSizeInBits();
11146 if (OrigXWidth < VTWidth) {
11147 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
11148 AddToWorklist(X.getNode());
11149 } else if (OrigXWidth > VTWidth) {
11150 // To get the sign bit in the right place, we have to shift it right
11151 // before truncating.
11152 SDLoc DL(X);
11153 X = DAG.getNode(ISD::SRL, DL,
11154 X.getValueType(), X,
11155 DAG.getConstant(OrigXWidth-VTWidth, DL,
11156 X.getValueType()));
11157 AddToWorklist(X.getNode());
11158 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
11159 AddToWorklist(X.getNode());
11160 }
11161
11162 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11163 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
11164 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11165 AddToWorklist(Cst.getNode());
11166 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
11167 AddToWorklist(X.getNode());
11168 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
11169 AddToWorklist(XorResult.getNode());
11170 SDValue XorResult64 = DAG.getNode(
11171 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
11173 SDLoc(XorResult)));
11174 AddToWorklist(XorResult64.getNode());
11175 SDValue FlipBit =
11176 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
11177 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
11178 AddToWorklist(FlipBit.getNode());
11179 SDValue FlipBits =
11180 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11181 AddToWorklist(FlipBits.getNode());
11182 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
11183 }
11184 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11185 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
11186 X, DAG.getConstant(SignBit, SDLoc(X), VT));
11187 AddToWorklist(X.getNode());
11188
11189 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11190 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
11191 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
11192 AddToWorklist(Cst.getNode());
11193
11194 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
11195 }
11196 }
11197
11198 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
11199 if (N0.getOpcode() == ISD::BUILD_PAIR)
11200 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
11201 return CombineLD;
11202
11203 // Remove double bitcasts from shuffles - this is often a legacy of
11204 // XformToShuffleWithZero being used to combine bitmaskings (of
11205 // float vectors bitcast to integer vectors) into shuffles.
11206 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
11207 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
11208 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
11211 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
11212
11213 // If operands are a bitcast, peek through if it casts the original VT.
11214 // If operands are a constant, just bitcast back to original VT.
11215 auto PeekThroughBitcast = [&](SDValue Op) {
11216 if (Op.getOpcode() == ISD::BITCAST &&
11217 Op.getOperand(0).getValueType() == VT)
11218 return SDValue(Op.getOperand(0));
11219 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
11221 return DAG.getBitcast(VT, Op);
11222 return SDValue();
11223 };
11224
11225 // FIXME: If either input vector is bitcast, try to convert the shuffle to
11226 // the result type of this bitcast. This would eliminate at least one
11227 // bitcast. See the transform in InstCombine.
11228 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
11229 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
11230 if (!(SV0 && SV1))
11231 return SDValue();
11232
11233 int MaskScale =
11235 SmallVector<int, 8> NewMask;
11236 for (int M : SVN->getMask())
11237 for (int i = 0; i != MaskScale; ++i)
11238 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
11239
11240 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
11241 if (!LegalMask) {
11242 std::swap(SV0, SV1);
11244 LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
11245 }
11246
11247 if (LegalMask)
11248 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
11249 }
11250
11251 return SDValue();
11252}
11253
11254SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
11255 EVT VT = N->getValueType(0);
11256 return CombineConsecutiveLoads(N, VT);
11257}
11258
11259/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
11260/// operands. DstEltVT indicates the destination element value type.
11261SDValue DAGCombiner::
11262ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
11263 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
11264
11265 // If this is already the right type, we're done.
11266 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
11267
11268 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
11269 unsigned DstBitSize = DstEltVT.getSizeInBits();
11270
11271 // If this is a conversion of N elements of one type to N elements of another
11272 // type, convert each element. This handles FP<->INT cases.
11273 if (SrcBitSize == DstBitSize) {
11275 for (SDValue Op : BV->op_values()) {
11276 // If the vector element type is not legal, the BUILD_VECTOR operands
11277 // are promoted and implicitly truncated. Make that explicit here.
11278 if (Op.getValueType() != SrcEltVT)
11279 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
11280 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
11281 AddToWorklist(Ops.back().getNode());
11282 }
11283 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11285 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
11286 }
11287
11288 // Otherwise, we're growing or shrinking the elements. To avoid having to
11289 // handle annoying details of growing/shrinking FP values, we convert them to
11290 // int first.
11291 if (SrcEltVT.isFloatingPoint()) {
11292 // Convert the input float vector to a int vector where the elements are the
11293 // same sizes.
11294 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
11295 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
11296 SrcEltVT = IntVT;
11297 }
11298
11299 // Now we know the input is an integer vector. If the output is a FP type,
11300 // convert to integer first, then to FP of the right size.
11301 if (DstEltVT.isFloatingPoint()) {
11302 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
11303 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
11304
11305 // Next, convert to FP elements of the same size.
11306 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
11307 }
11308
11309 SDLoc DL(BV);
11310
11311 // Okay, we know the src/dst types are both integers of differing types.
11312 // Handling growing first.
11313 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
11314 if (SrcBitSize < DstBitSize) {
11315 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
11316
11318 for (unsigned i = 0, e = BV->getNumOperands(); i != e;
11319 i += NumInputsPerOutput) {
11320 bool isLE = DAG.getDataLayout().isLittleEndian();
11321 APInt NewBits = APInt(DstBitSize, 0);
11322 bool EltIsUndef = true;
11323 for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
11324 // Shift the previously computed bits over.
11325 NewBits <<= SrcBitSize;
11326 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
11327 if (Op.isUndef()) continue;
11328 EltIsUndef = false;
11329
11330 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
11331 zextOrTrunc(SrcBitSize).zext(DstBitSize);
11332 }
11333
11334 if (EltIsUndef)
11335 Ops.push_back(DAG.getUNDEF(DstEltVT));
11336 else
11337 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
11338 }
11339
11340 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
11341 return DAG.getBuildVector(VT, DL, Ops);
11342 }
11343
11344 // Finally, this must be the case where we are shrinking elements: each input
11345 // turns into multiple outputs.
11346 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
11347 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11348 NumOutputsPerInput*BV->getNumOperands());
11350
11351 for (const SDValue &Op : BV->op_values()) {
11352 if (Op.isUndef()) {
11353 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
11354 continue;
11355 }
11356
11357 APInt OpVal = cast<ConstantSDNode>(Op)->
11358 getAPIntValue().zextOrTrunc(SrcBitSize);
11359
11360 for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
11361 APInt ThisVal = OpVal.trunc(DstBitSize);
11362 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
11363 OpVal.lshrInPlace(DstBitSize);
11364 }
11365
11366 // For big endian targets, swap the order of the pieces of each element.
11367 if (DAG.getDataLayout().isBigEndian())
11368 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
11369 }
11370
11371 return DAG.getBuildVector(VT, DL, Ops);
11372}
11373
11374static bool isContractable(SDNode *N) {
11375 SDNodeFlags F = N->getFlags();
11376 return F.hasAllowContract() || F.hasAllowReassociation();
11377}
11378
11379/// Try to perform FMA combining on a given FADD node.
11380SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
11381 SDValue N0 = N->getOperand(0);
11382 SDValue N1 = N->getOperand(1);
11383 EVT VT = N->getValueType(0);
11384 SDLoc SL(N);
11385
11386 const TargetOptions &Options = DAG.getTarget().Options;
11387
11388 // Floating-point multiply-add with intermediate rounding.
11389 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11390
11391 // Floating-point multiply-add without intermediate rounding.
11392 bool HasFMA =
11394 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11395
11396 // No valid opcode, do not combine.
11397 if (!HasFMAD && !HasFMA)
11398 return SDValue();
11399
11400 SDNodeFlags Flags = N->getFlags();
11401 bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11402 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11403 CanFuse || HasFMAD);
11404 // If the addition is not contractable, do not combine.
11405 if (!AllowFusionGlobally && !isContractable(N))
11406 return SDValue();
11407
11409 if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11410 return SDValue();
11411
11412 // Always prefer FMAD to FMA for precision.
11413 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11415
11416 // Is the node an FMUL and contractable either due to global flags or
11417 // SDNodeFlags.
11418 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11419 if (N.getOpcode() != ISD::FMUL)
11420 return false;
11421 return AllowFusionGlobally || isContractable(N.getNode());
11422 };
11423 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
11424 // prefer to fold the multiply with fewer uses.
11425 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
11426 if (N0.getNode()->use_size() > N1.getNode()->use_size())
11427 std::swap(N0, N1);
11428 }
11429
11430 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
11431 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11432 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11433 N0.getOperand(0), N0.getOperand(1), N1, Flags);
11434 }
11435
11436 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
11437 // Note: Commutes FADD operands.
11438 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11439 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11440 N1.getOperand(0), N1.getOperand(1), N0, Flags);
11441 }
11442
11443 // Look through FP_EXTEND nodes to do more combining.
11444
11445 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
11446 if (N0.getOpcode() == ISD::FP_EXTEND) {
11447 SDValue N00 = N0.getOperand(0);
11448 if (isContractableFMUL(N00) &&
11449 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11450 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11451 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11452 N00.getOperand(0)),
11453 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11454 N00.getOperand(1)), N1, Flags);
11455 }
11456 }
11457
11458 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
11459 // Note: Commutes FADD operands.
11460 if (N1.getOpcode() == ISD::FP_EXTEND) {
11461 SDValue N10 = N1.getOperand(0);
11462 if (isContractableFMUL(N10) &&
11463 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11464 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11465 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11466 N10.getOperand(0)),
11467 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11468 N10.getOperand(1)), N0, Flags);
11469 }
11470 }
11471
11472 // More folding opportunities when target permits.
11473 if (Aggressive) {
11474 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
11475 if (CanFuse &&
11476 N0.getOpcode() == PreferredFusedOpcode &&
11477 N0.getOperand(2).getOpcode() == ISD::FMUL &&
11478 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
11479 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11480 N0.getOperand(0), N0.getOperand(1),
11481 DAG.getNode(PreferredFusedOpcode, SL, VT,
11482 N0.getOperand(2).getOperand(0),
11483 N0.getOperand(2).getOperand(1),
11484 N1, Flags), Flags);
11485 }
11486
11487 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
11488 if (CanFuse &&
11489 N1->getOpcode() == PreferredFusedOpcode &&
11490 N1.getOperand(2).getOpcode() == ISD::FMUL &&
11491 N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
11492 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11493 N1.getOperand(0), N1.getOperand(1),
11494 DAG.getNode(PreferredFusedOpcode, SL, VT,
11495 N1.getOperand(2).getOperand(0),
11496 N1.getOperand(2).getOperand(1),
11497 N0, Flags), Flags);
11498 }
11499
11500
11501 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
11502 // -> (fma x, y, (fma (fpext u), (fpext v), z))
11503 auto FoldFAddFMAFPExtFMul = [&] (
11505 SDNodeFlags Flags) {
11506 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
11507 DAG.getNode(PreferredFusedOpcode, SL, VT,
11508 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11509 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11510 Z, Flags), Flags);
11511 };
11512 if (N0.getOpcode() == PreferredFusedOpcode) {
11513 SDValue N02 = N0.getOperand(2);
11514 if (N02.getOpcode() == ISD::FP_EXTEND) {
11515 SDValue N020 = N02.getOperand(0);
11516 if (isContractableFMUL(N020) &&
11517 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11518 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
11519 N020.getOperand(0), N020.getOperand(1),
11520 N1, Flags);
11521 }
11522 }
11523 }
11524
11525 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
11526 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
11527 // FIXME: This turns two single-precision and one double-precision
11528 // operation into two double-precision operations, which might not be
11529 // interesting for all targets, especially GPUs.
11530 auto FoldFAddFPExtFMAFMul = [&] (
11532 SDNodeFlags Flags) {
11533 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11534 DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
11535 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
11536 DAG.getNode(PreferredFusedOpcode, SL, VT,
11537 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11538 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11539 Z, Flags), Flags);
11540 };
11541 if (N0.getOpcode() == ISD::FP_EXTEND) {
11542 SDValue N00 = N0.getOperand(0);
11543 if (N00.getOpcode() == PreferredFusedOpcode) {
11544 SDValue N002 = N00.getOperand(2);
11545 if (isContractableFMUL(N002) &&
11546 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11547 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
11548 N002.getOperand(0), N002.getOperand(1),
11549 N1, Flags);
11550 }
11551 }
11552 }
11553
11554 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
11555 // -> (fma y, z, (fma (fpext u), (fpext v), x))
11556 if (N1.getOpcode() == PreferredFusedOpcode) {
11557 SDValue N12 = N1.getOperand(2);
11558 if (N12.getOpcode() == ISD::FP_EXTEND) {
11559 SDValue N120 = N12.getOperand(0);
11560 if (isContractableFMUL(N120) &&
11561 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11562 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
11563 N120.getOperand(0), N120.getOperand(1),
11564 N0, Flags);
11565 }
11566 }
11567 }
11568
11569 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
11570 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
11571 // FIXME: This turns two single-precision and one double-precision
11572 // operation into two double-precision operations, which might not be
11573 // interesting for all targets, especially GPUs.
11574 if (N1.getOpcode() == ISD::FP_EXTEND) {
11575 SDValue N10 = N1.getOperand(0);
11576 if (N10.getOpcode() == PreferredFusedOpcode) {
11577 SDValue N102 = N10.getOperand(2);
11578 if (isContractableFMUL(N102) &&
11579 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11580 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
11581 N102.getOperand(0), N102.getOperand(1),
11582 N0, Flags);
11583 }
11584 }
11585 }
11586 }
11587
11588 return SDValue();
11589}
11590
11591/// Try to perform FMA combining on a given FSUB node.
11592SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
11593 SDValue N0 = N->getOperand(0);
11594 SDValue N1 = N->getOperand(1);
11595 EVT VT = N->getValueType(0);
11596 SDLoc SL(N);
11597
11598 const TargetOptions &Options = DAG.getTarget().Options;
11599 // Floating-point multiply-add with intermediate rounding.
11600 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11601
11602 // Floating-point multiply-add without intermediate rounding.
11603 bool HasFMA =
11605 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11606
11607 // No valid opcode, do not combine.
11608 if (!HasFMAD && !HasFMA)
11609 return SDValue();
11610
11611 const SDNodeFlags Flags = N->getFlags();
11612 bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11613 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11614 CanFuse || HasFMAD);
11615
11616 // If the subtraction is not contractable, do not combine.
11617 if (!AllowFusionGlobally && !isContractable(N))
11618 return SDValue();
11619
11621 if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11622 return SDValue();
11623
11624 // Always prefer FMAD to FMA for precision.
11625 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11627
11628 // Is the node an FMUL and contractable either due to global flags or
11629 // SDNodeFlags.
11630 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11631 if (N.getOpcode() != ISD::FMUL)
11632 return false;
11633 return AllowFusionGlobally || isContractable(N.getNode());
11634 };
11635
11636 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
11637 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11638 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11639 N0.getOperand(0), N0.getOperand(1),
11640 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11641 }
11642
11643 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
11644 // Note: Commutes FSUB operands.
11645 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11646 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11647 DAG.getNode(ISD::FNEG, SL, VT,
11648 N1.getOperand(0)),
11649 N1.getOperand(1), N0, Flags);
11650 }
11651
11652 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
11653 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
11654 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
11655 SDValue N00 = N0.getOperand(0).getOperand(0);
11656 SDValue N01 = N0.getOperand(0).getOperand(1);
11657 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11658 DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
11659 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11660 }
11661
11662 // Look through FP_EXTEND nodes to do more combining.
11663
11664 // fold (fsub (fpext (fmul x, y)), z)
11665 // -> (fma (fpext x), (fpext y), (fneg z))
11666 if (N0.getOpcode() == ISD::FP_EXTEND) {
11667 SDValue N00 = N0.getOperand(0);
11668 if (isContractableFMUL(N00) &&
11669 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11670 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11671 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11672 N00.getOperand(0)),
11673 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11674 N00.getOperand(1)),
11675 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11676 }
11677 }
11678
11679 // fold (fsub x, (fpext (fmul y, z)))
11680 // -> (fma (fneg (fpext y)), (fpext z), x)
11681 // Note: Commutes FSUB operands.
11682 if (N1.getOpcode() == ISD::FP_EXTEND) {
11683 SDValue N10 = N1.getOperand(0);
11684 if (isContractableFMUL(N10) &&
11685 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11686 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11687 DAG.getNode(ISD::FNEG, SL, VT,
11688 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11689 N10.getOperand(0))),
11690 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11691 N10.getOperand(1)),
11692 N0, Flags);
11693 }
11694 }
11695
11696 // fold (fsub (fpext (fneg (fmul, x, y))), z)
11697 // -> (fneg (fma (fpext x), (fpext y), z))
11698 // Note: This could be removed with appropriate canonicalization of the
11699 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11700 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11701 // from implementing the canonicalization in visitFSUB.
11702 if (N0.getOpcode() == ISD::FP_EXTEND) {
11703 SDValue N00 = N0.getOperand(0);
11704 if (N00.getOpcode() == ISD::FNEG) {
11705 SDValue N000 = N00.getOperand(0);
11706 if (isContractableFMUL(N000) &&
11707 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11708 return DAG.getNode(ISD::FNEG, SL, VT,
11709 DAG.getNode(PreferredFusedOpcode, SL, VT,
11710 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11711 N000.getOperand(0)),
11712 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11713 N000.getOperand(1)),
11714 N1, Flags));
11715 }
11716 }
11717 }
11718
11719 // fold (fsub (fneg (fpext (fmul, x, y))), z)
11720 // -> (fneg (fma (fpext x)), (fpext y), z)
11721 // Note: This could be removed with appropriate canonicalization of the
11722 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11723 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11724 // from implementing the canonicalization in visitFSUB.
11725 if (N0.getOpcode() == ISD::FNEG) {
11726 SDValue N00 = N0.getOperand(0);
11727 if (N00.getOpcode() == ISD::FP_EXTEND) {
11728 SDValue N000 = N00.getOperand(0);
11729 if (isContractableFMUL(N000) &&
11730 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
11731 return DAG.getNode(ISD::FNEG, SL, VT,
11732 DAG.getNode(PreferredFusedOpcode, SL, VT,
11733 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11734 N000.getOperand(0)),
11735 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11736 N000.getOperand(1)),
11737 N1, Flags));
11738 }
11739 }
11740 }
11741
11742 // More folding opportunities when target permits.
11743 if (Aggressive) {
11744 // fold (fsub (fma x, y, (fmul u, v)), z)
11745 // -> (fma x, y (fma u, v, (fneg z)))
11746 if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
11747 isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
11748 N0.getOperand(2)->hasOneUse()) {
11749 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11750 N0.getOperand(0), N0.getOperand(1),
11751 DAG.getNode(PreferredFusedOpcode, SL, VT,
11752 N0.getOperand(2).getOperand(0),
11753 N0.getOperand(2).getOperand(1),
11754 DAG.getNode(ISD::FNEG, SL, VT,
11755 N1), Flags), Flags);
11756 }
11757
11758 // fold (fsub x, (fma y, z, (fmul u, v)))
11759 // -> (fma (fneg y), z, (fma (fneg u), v, x))
11760 if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
11761 isContractableFMUL(N1.getOperand(2))) {
11762 SDValue N20 = N1.getOperand(2).getOperand(0);
11763 SDValue N21 = N1.getOperand(2).getOperand(1);
11764 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11765 DAG.getNode(ISD::FNEG, SL, VT,
11766 N1.getOperand(0)),
11767 N1.getOperand(1),
11768 DAG.getNode(PreferredFusedOpcode, SL, VT,
11769 DAG.getNode(ISD::FNEG, SL, VT, N20),
11770 N21, N0, Flags), Flags);
11771 }
11772
11773
11774 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
11775 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
11776 if (N0.getOpcode() == PreferredFusedOpcode) {
11777 SDValue N02 = N0.getOperand(2);
11778 if (N02.getOpcode() == ISD::FP_EXTEND) {
11779 SDValue N020 = N02.getOperand(0);
11780 if (isContractableFMUL(N020) &&
11781 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11782 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11783 N0.getOperand(0), N0.getOperand(1),
11784 DAG.getNode(PreferredFusedOpcode, SL, VT,
11785 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11786 N020.getOperand(0)),
11787 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11788 N020.getOperand(1)),
11789 DAG.getNode(ISD::FNEG, SL, VT,
11790 N1), Flags), Flags);
11791 }
11792 }
11793 }
11794
11795 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
11796 // -> (fma (fpext x), (fpext y),
11797 // (fma (fpext u), (fpext v), (fneg z)))
11798 // FIXME: This turns two single-precision and one double-precision
11799 // operation into two double-precision operations, which might not be
11800 // interesting for all targets, especially GPUs.
11801 if (N0.getOpcode() == ISD::FP_EXTEND) {
11802 SDValue N00 = N0.getOperand(0);
11803 if (N00.getOpcode() == PreferredFusedOpcode) {
11804 SDValue N002 = N00.getOperand(2);
11805 if (isContractableFMUL(N002) &&
11806 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11807 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11808 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11809 N00.getOperand(0)),
11810 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11811 N00.getOperand(1)),
11812 DAG.getNode(PreferredFusedOpcode, SL, VT,
11813 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11814 N002.getOperand(0)),
11815 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11816 N002.getOperand(1)),
11817 DAG.getNode(ISD::FNEG, SL, VT,
11818 N1), Flags), Flags);
11819 }
11820 }
11821 }
11822
11823 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
11824 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
11825 if (N1.getOpcode() == PreferredFusedOpcode &&
11826 N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
11827 SDValue N120 = N1.getOperand(2).getOperand(0);
11828 if (isContractableFMUL(N120) &&
11829 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11830 SDValue N1200 = N120.getOperand(0);
11831 SDValue N1201 = N120.getOperand(1);
11832 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11833 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
11834 N1.getOperand(1),
11835 DAG.getNode(PreferredFusedOpcode, SL, VT,
11836 DAG.getNode(ISD::FNEG, SL, VT,
11837 DAG.getNode(ISD::FP_EXTEND, SL,
11838 VT, N1200)),
11839 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11840 N1201),
11841 N0, Flags), Flags);
11842 }
11843 }
11844
11845 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
11846 // -> (fma (fneg (fpext y)), (fpext z),
11847 // (fma (fneg (fpext u)), (fpext v), x))
11848 // FIXME: This turns two single-precision and one double-precision
11849 // operation into two double-precision operations, which might not be
11850 // interesting for all targets, especially GPUs.
11851 if (N1.getOpcode() == ISD::FP_EXTEND &&
11852 N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
11853 SDValue CvtSrc = N1.getOperand(0);
11854 SDValue N100 = CvtSrc.getOperand(0);
11855 SDValue N101 = CvtSrc.getOperand(1);
11856 SDValue N102 = CvtSrc.getOperand(2);
11857 if (isContractableFMUL(N102) &&
11858 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
11859 SDValue N1020 = N102.getOperand(0);
11860 SDValue N1021 = N102.getOperand(1);
11861 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11862 DAG.getNode(ISD::FNEG, SL, VT,
11863 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11864 N100)),
11865 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
11866 DAG.getNode(PreferredFusedOpcode, SL, VT,
11867 DAG.getNode(ISD::FNEG, SL, VT,
11868 DAG.getNode(ISD::FP_EXTEND, SL,
11869 VT, N1020)),
11870 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11871 N1021),
11872 N0, Flags), Flags);
11873 }
11874 }
11875 }
11876
11877 return SDValue();
11878}
11879
11880/// Try to perform FMA combining on a given FMUL node based on the distributive
11881/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
11882/// subtraction instead of addition).
11883SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
11884 SDValue N0 = N->getOperand(0);
11885 SDValue N1 = N->getOperand(1);
11886 EVT VT = N->getValueType(0);
11887 SDLoc SL(N);
11888 const SDNodeFlags Flags = N->getFlags();
11889
11890 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
11891
11892 const TargetOptions &Options = DAG.getTarget().Options;
11893
11894 // The transforms below are incorrect when x == 0 and y == inf, because the
11895 // intermediate multiplication produces a nan.
11896 if (!Options.NoInfsFPMath)
11897 return SDValue();
11898
11899 // Floating-point multiply-add without intermediate rounding.
11900 bool HasFMA =
11901 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
11903 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11904
11905 // Floating-point multiply-add with intermediate rounding. This can result
11906 // in a less precise result due to the changed rounding order.
11907 bool HasFMAD = Options.UnsafeFPMath &&
11908 (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11909
11910 // No valid opcode, do not combine.
11911 if (!HasFMAD && !HasFMA)
11912 return SDValue();
11913
11914 // Always prefer FMAD to FMA for precision.
11915 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11917
11918 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
11919 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
11920 auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11921 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
11922 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
11923 if (C->isExactlyValue(+1.0))
11924 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11925 Y, Flags);
11926 if (C->isExactlyValue(-1.0))
11927 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11928 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11929 }
11930 }
11931 return SDValue();
11932 };
11933
11934 if (SDValue FMA = FuseFADD(N0, N1, Flags))
11935 return FMA;
11936 if (SDValue FMA = FuseFADD(N1, N0, Flags))
11937 return FMA;
11938
11939 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
11940 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
11941 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
11942 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
11943 auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11944 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
11945 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
11946 if (C0->isExactlyValue(+1.0))
11947 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11948 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11949 Y, Flags);
11950 if (C0->isExactlyValue(-1.0))
11951 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11952 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11953 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11954 }
11955 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
11956 if (C1->isExactlyValue(+1.0))
11957 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11958 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11959 if (C1->isExactlyValue(-1.0))
11960 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11961 Y, Flags);
11962 }
11963 }
11964 return SDValue();
11965 };
11966
11967 if (SDValue FMA = FuseFSUB(N0, N1, Flags))
11968 return FMA;
11969 if (SDValue FMA = FuseFSUB(N1, N0, Flags))
11970 return FMA;
11971
11972 return SDValue();
11973}
11974
11975SDValue DAGCombiner::visitFADD(SDNode *N) {
11976 SDValue N0 = N->getOperand(0);
11977 SDValue N1 = N->getOperand(1);
11978 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11979 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11980 EVT VT = N->getValueType(0);
11981 SDLoc DL(N);
11982 const TargetOptions &Options = DAG.getTarget().Options;
11983 const SDNodeFlags Flags = N->getFlags();
11984
11985 // fold vector ops
11986 if (VT.isVector())
11987 if (SDValue FoldedVOp = SimplifyVBinOp(N))
11988 return FoldedVOp;
11989
11990 // fold (fadd c1, c2) -> c1 + c2
11991 if (N0CFP && N1CFP)
11992 return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
11993
11994 // canonicalize constant to RHS
11995 if (N0CFP && !N1CFP)
11996 return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
11997
11998 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
11999 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
12000 if (N1C && N1C->isZero())
12001 if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
12002 return N0;
12003
12004 if (SDValue NewSel = foldBinOpIntoSelect(N))
12005 return NewSel;
12006
12007 // fold (fadd A, (fneg B)) -> (fsub A, B)
12008 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
12009 isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
12010 return DAG.getNode(ISD::FSUB, DL, VT, N0,
12011 GetNegatedExpression(N1, DAG, LegalOperations,
12012 ForCodeSize), Flags);
12013
12014 // fold (fadd (fneg A), B) -> (fsub B, A)
12015 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
12016 isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
12017 return DAG.getNode(ISD::FSUB, DL, VT, N1,
12018 GetNegatedExpression(N0, DAG, LegalOperations,
12019 ForCodeSize), Flags);
12020
12021 auto isFMulNegTwo = [](SDValue FMul) {
12022 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
12023 return false;
12024 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
12025 return C && C->isExactlyValue(-2.0);
12026 };
12027
12028 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
12029 if (isFMulNegTwo(N0)) {
12030 SDValue B = N0.getOperand(0);
12031 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12032 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
12033 }
12034 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
12035 if (isFMulNegTwo(N1)) {
12036 SDValue B = N1.getOperand(0);
12037 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12038 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
12039 }
12040
12041 // No FP constant should be created after legalization as Instruction
12042 // Selection pass has a hard time dealing with FP constants.
12043 bool AllowNewConst = (Level < AfterLegalizeDAG);
12044
12045 // If nnan is enabled, fold lots of things.
12046 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
12047 // If allowed, fold (fadd (fneg x), x) -> 0.0
12048 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
12049 return DAG.getConstantFP(0.0, DL, VT);
12050
12051 // If allowed, fold (fadd x, (fneg x)) -> 0.0
12052 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
12053 return DAG.getConstantFP(0.0, DL, VT);
12054 }
12055
12056 // If 'unsafe math' or reassoc and nsz, fold lots of things.
12057 // TODO: break out portions of the transformations below for which Unsafe is
12058 // considered and which do not require both nsz and reassoc
12059 if ((Options.UnsafeFPMath ||
12060 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12061 AllowNewConst) {
12062 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
12063 if (N1CFP && N0.getOpcode() == ISD::FADD &&
12065 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
12066 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
12067 }
12068
12069 // We can fold chains of FADD's of the same value into multiplications.
12070 // This transform is not safe in general because we are reducing the number
12071 // of rounding steps.
12072 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
12073 if (N0.getOpcode() == ISD::FMUL) {
12076
12077 // (fadd (fmul x, c), x) -> (fmul x, c+1)
12078 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
12079 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12080 DAG.getConstantFP(1.0, DL, VT), Flags);
12081 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
12082 }
12083
12084 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
12085 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
12086 N1.getOperand(0) == N1.getOperand(1) &&
12087 N0.getOperand(0) == N1.getOperand(0)) {
12088 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12089 DAG.getConstantFP(2.0, DL, VT), Flags);
12090 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
12091 }
12092 }
12093
12094 if (N1.getOpcode() == ISD::FMUL) {
12097
12098 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
12099 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
12100 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12101 DAG.getConstantFP(1.0, DL, VT), Flags);
12102 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
12103 }
12104
12105 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
12106 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
12107 N0.getOperand(0) == N0.getOperand(1) &&
12108 N1.getOperand(0) == N0.getOperand(0)) {
12109 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12110 DAG.getConstantFP(2.0, DL, VT), Flags);
12111 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
12112 }
12113 }
12114
12115 if (N0.getOpcode() == ISD::FADD) {
12117 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
12118 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
12119 (N0.getOperand(0) == N1)) {
12120 return DAG.getNode(ISD::FMUL, DL, VT,
12121 N1, DAG.getConstantFP(3.0, DL, VT), Flags);
12122 }
12123 }
12124
12125 if (N1.getOpcode() == ISD::FADD) {
12127 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
12128 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
12129 N1.getOperand(0) == N0) {
12130 return DAG.getNode(ISD::FMUL, DL, VT,
12131 N0, DAG.getConstantFP(3.0, DL, VT), Flags);
12132 }
12133 }
12134
12135 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
12136 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
12137 N0.getOperand(0) == N0.getOperand(1) &&
12138 N1.getOperand(0) == N1.getOperand(1) &&
12139 N0.getOperand(0) == N1.getOperand(0)) {
12140 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
12141 DAG.getConstantFP(4.0, DL, VT), Flags);
12142 }
12143 }
12144 } // enable-unsafe-fp-math
12145
12146 // FADD -> FMA combines:
12147 if (SDValue Fused = visitFADDForFMACombine(N)) {
12148 AddToWorklist(Fused.getNode());
12149 return Fused;
12150 }
12151 return SDValue();
12152}
12153
12154SDValue DAGCombiner::visitFSUB(SDNode *N) {
12155 SDValue N0 = N->getOperand(0);
12156 SDValue N1 = N->getOperand(1);
12157 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12158 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12159 EVT VT = N->getValueType(0);
12160 SDLoc DL(N);
12161 const TargetOptions &Options = DAG.getTarget().Options;
12162 const SDNodeFlags Flags = N->getFlags();
12163
12164 // fold vector ops
12165 if (VT.isVector())
12166 if (SDValue FoldedVOp = SimplifyVBinOp(N))
12167 return FoldedVOp;
12168
12169 // fold (fsub c1, c2) -> c1-c2
12170 if (N0CFP && N1CFP)
12171 return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
12172
12173 if (SDValue NewSel = foldBinOpIntoSelect(N))
12174 return NewSel;
12175
12176 // (fsub A, 0) -> A
12177 if (N1CFP && N1CFP->isZero()) {
12178 if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
12179 Flags.hasNoSignedZeros()) {
12180 return N0;
12181 }
12182 }
12183
12184 if (N0 == N1) {
12185 // (fsub x, x) -> 0.0
12186 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
12187 return DAG.getConstantFP(0.0f, DL, VT);
12188 }
12189
12190 // (fsub -0.0, N1) -> -N1
12191 // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
12192 // FSUB does not specify the sign bit of a NaN. Also note that for
12193 // the same reason, the inverse transform is not safe, unless fast math
12194 // flags are in play.
12195 if (N0CFP && N0CFP->isZero()) {
12196 if (N0CFP->isNegative() ||
12197 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
12198 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
12199 return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12200 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12201 return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
12202 }
12203 }
12204
12205 if ((Options.UnsafeFPMath ||
12206 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
12207 && N1.getOpcode() == ISD::FADD) {
12208 // X - (X + Y) -> -Y
12209 if (N0 == N1->getOperand(0))
12210 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
12211 // X - (Y + X) -> -Y
12212 if (N0 == N1->getOperand(1))
12213 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
12214 }
12215
12216 // fold (fsub A, (fneg B)) -> (fadd A, B)
12217 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
12218 return DAG.getNode(ISD::FADD, DL, VT, N0,
12219 GetNegatedExpression(N1, DAG, LegalOperations,
12220 ForCodeSize), Flags);
12221
12222 // FSUB -> FMA combines:
12223 if (SDValue Fused = visitFSUBForFMACombine(N)) {
12224 AddToWorklist(Fused.getNode());
12225 return Fused;
12226 }
12227
12228 return SDValue();
12229}
12230
12231SDValue DAGCombiner::visitFMUL(SDNode *N) {
12232 SDValue N0 = N->getOperand(0);
12233 SDValue N1 = N->getOperand(1);
12234 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12235 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12236 EVT VT = N->getValueType(0);
12237 SDLoc DL(N);
12238 const TargetOptions &Options = DAG.getTarget().Options;
12239 const SDNodeFlags Flags = N->getFlags();
12240
12241 // fold vector ops
12242 if (VT.isVector()) {
12243 // This just handles C1 * C2 for vectors. Other vector folds are below.
12244 if (SDValue FoldedVOp = SimplifyVBinOp(N))
12245 return FoldedVOp;
12246 }
12247
12248 // fold (fmul c1, c2) -> c1*c2
12249 if (N0CFP && N1CFP)
12250 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
12251
12252 // canonicalize constant to RHS
12255 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
12256
12257 // fold (fmul A, 1.0) -> A
12258 if (N1CFP && N1CFP->isExactlyValue(1.0))
12259 return N0;
12260
12261 if (SDValue NewSel = foldBinOpIntoSelect(N))
12262 return NewSel;
12263
12264 if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
12265 (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
12266 // fold (fmul A, 0) -> 0
12267 if (N1CFP && N1CFP->isZero())
12268 return N1;
12269 }
12270
12271 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
12272 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
12274 N0.getOpcode() == ISD::FMUL) {
12275 SDValue N00 = N0.getOperand(0);
12276 SDValue N01 = N0.getOperand(1);
12277 // Avoid an infinite loop by making sure that N00 is not a constant
12278 // (the inner multiply has not been constant folded yet).
12281 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
12282 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
12283 }
12284 }
12285
12286 // Match a special-case: we convert X * 2.0 into fadd.
12287 // fmul (fadd X, X), C -> fmul X, 2.0 * C
12288 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
12289 N0.getOperand(0) == N0.getOperand(1)) {
12290 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
12291 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
12292 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
12293 }
12294 }
12295
12296 // fold (fmul X, 2.0) -> (fadd X, X)
12297 if (N1CFP && N1CFP->isExactlyValue(+2.0))
12298 return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
12299
12300 // fold (fmul X, -1.0) -> (fneg X)
12301 if (N1CFP && N1CFP->isExactlyValue(-1.0))
12302 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12303 return DAG.getNode(ISD::FNEG, DL, VT, N0);
12304
12305 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
12306 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
12307 ForCodeSize)) {
12308 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
12309 ForCodeSize)) {
12310 // Both can be negated for free, check to see if at least one is cheaper
12311 // negated.
12312 if (LHSNeg == 2 || RHSNeg == 2)
12313 return DAG.getNode(ISD::FMUL, DL, VT,
12314 GetNegatedExpression(N0, DAG, LegalOperations,
12315 ForCodeSize),
12316 GetNegatedExpression(N1, DAG, LegalOperations,
12317 ForCodeSize),
12318 Flags);
12319 }
12320 }
12321
12322 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
12323 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
12324 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
12325 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
12326 TLI.isOperationLegal(ISD::FABS, VT)) {
12327 SDValue Select = N0, X = N1;
12328 if (Select.getOpcode() != ISD::SELECT)
12329 std::swap(Select, X);
12330
12331 SDValue Cond = Select.getOperand(0);
12332 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
12333 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
12334
12335 if (TrueOpnd && FalseOpnd &&
12336 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
12337 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
12338 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
12339 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12340 switch (CC) {
12341 default: break;
12342 case ISD::SETOLT:
12343 case ISD::SETULT:
12344 case ISD::SETOLE:
12345 case ISD::SETULE:
12346 case ISD::SETLT:
12347 case ISD::SETLE:
12348 std::swap(TrueOpnd, FalseOpnd);
12350 case ISD::SETOGT:
12351 case ISD::SETUGT:
12352 case ISD::SETOGE:
12353 case ISD::SETUGE:
12354 case ISD::SETGT:
12355 case ISD::SETGE:
12356 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
12357 TLI.isOperationLegal(ISD::FNEG, VT))
12358 return DAG.getNode(ISD::FNEG, DL, VT,
12359 DAG.getNode(ISD::FABS, DL, VT, X));
12360 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
12361 return DAG.getNode(ISD::FABS, DL, VT, X);
12362
12363 break;
12364 }
12365 }
12366 }
12367
12368 // FMUL -> FMA combines:
12369 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
12370 AddToWorklist(Fused.getNode());
12371 return Fused;
12372 }
12373
12374 return SDValue();
12375}
12376
12377SDValue DAGCombiner::visitFMA(SDNode *N) {
12378 SDValue N0 = N->getOperand(0);
12379 SDValue N1 = N->getOperand(1);
12380 SDValue N2 = N->getOperand(2);
12381 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12382 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12383 EVT VT = N->getValueType(0);
12384 SDLoc DL(N);
12385 const TargetOptions &Options = DAG.getTarget().Options;
12386
12387 // FMA nodes have flags that propagate to the created nodes.
12388 const SDNodeFlags Flags = N->getFlags();
12389 bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
12390
12391 // Constant fold FMA.
12392 if (isa<ConstantFPSDNode>(N0) &&
12393 isa<ConstantFPSDNode>(N1) &&
12394 isa<ConstantFPSDNode>(N2)) {
12395 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
12396 }
12397
12398 if (UnsafeFPMath) {
12399 if (N0CFP && N0CFP->isZero())
12400 return N2;
12401 if (N1CFP && N1CFP->isZero())
12402 return N2;
12403 }
12404 // TODO: The FMA node should have flags that propagate to these nodes.
12405 if (N0CFP && N0CFP->isExactlyValue(1.0))
12406 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
12407 if (N1CFP && N1CFP->isExactlyValue(1.0))
12408 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
12409
12410 // Canonicalize (fma c, x, y) -> (fma x, c, y)
12413 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
12414
12415 if (UnsafeFPMath) {
12416 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
12417 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
12420 return DAG.getNode(ISD::FMUL, DL, VT, N0,
12421 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
12422 Flags), Flags);
12423 }
12424
12425 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
12426 if (N0.getOpcode() == ISD::FMUL &&
12429 return DAG.getNode(ISD::FMA, DL, VT,
12430 N0.getOperand(0),
12431 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
12432 Flags),
12433 N2);
12434 }
12435 }
12436
12437 // (fma x, 1, y) -> (fadd x, y)
12438 // (fma x, -1, y) -> (fadd (fneg x), y)
12439 if (N1CFP) {
12440 if (N1CFP->isExactlyValue(1.0))
12441 // TODO: The FMA node should have flags that propagate to this node.
12442 return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
12443
12444 if (N1CFP->isExactlyValue(-1.0) &&
12445 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
12446 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
12447 AddToWorklist(RHSNeg.getNode());
12448 // TODO: The FMA node should have flags that propagate to this node.
12449 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
12450 }
12451
12452 // fma (fneg x), K, y -> fma x -K, y
12453 if (N0.getOpcode() == ISD::FNEG &&
12455 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
12456 ForCodeSize)))) {
12457 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
12458 DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
12459 }
12460 }
12461
12462 if (UnsafeFPMath) {
12463 // (fma x, c, x) -> (fmul x, (c+1))
12464 if (N1CFP && N0 == N2) {
12465 return DAG.getNode(ISD::FMUL, DL, VT, N0,
12466 DAG.getNode(ISD::FADD, DL, VT, N1,
12467 DAG.getConstantFP(1.0, DL, VT), Flags),
12468 Flags);
12469 }
12470
12471 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
12472 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
12473 return DAG.getNode(ISD::FMUL, DL, VT, N0,
12474 DAG.getNode(ISD::FADD, DL, VT, N1,
12475 DAG.getConstantFP(-1.0, DL, VT), Flags),
12476 Flags);
12477 }
12478 }
12479
12480 return SDValue();
12481}
12482
12483// Combine multiple FDIVs with the same divisor into multiple FMULs by the
12484// reciprocal.
12485// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
12486// Notice that this is not always beneficial. One reason is different targets
12487// may have different costs for FDIV and FMUL, so sometimes the cost of two
12488// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
12489// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
12491 // TODO: Limit this transform based on optsize/minsize - it always creates at
12492 // least 1 extra instruction. But the perf win may be substantial enough
12493 // that only minsize should restrict this.
12494 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
12495 const SDNodeFlags Flags = N->getFlags();
12496 if (!UnsafeMath && !Flags.hasAllowReciprocal())
12497 return SDValue();
12498
12499 // Skip if current node is a reciprocal/fneg-reciprocal.
12500 SDValue N0 = N->getOperand(0);
12501 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
12502 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
12503 return SDValue();
12504
12505 // Exit early if the target does not want this transform or if there can't
12506 // possibly be enough uses of the divisor to make the transform worthwhile.
12507 SDValue N1 = N->getOperand(1);
12508 unsigned MinUses = TLI.combineRepeatedFPDivisors();
12509
12510 // For splat vectors, scale the number of uses by the splat factor. If we can
12511 // convert the division into a scalar op, that will likely be much faster.
12512 unsigned NumElts = 1;
12513 EVT VT = N->getValueType(0);
12514 if (VT.isVector() && DAG.isSplatValue(N1))
12515 NumElts = VT.getVectorNumElements();
12516
12517 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
12518 return SDValue();
12519
12520 // Find all FDIV users of the same divisor.
12521 // Use a set because duplicates may be present in the user list.
12523 for (auto *U : N1->uses()) {
12524 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
12525 // This division is eligible for optimization only if global unsafe math
12526 // is enabled or if this division allows reciprocal formation.
12527 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
12528 Users.insert(U);
12529 }
12530 }
12531
12532 // Now that we have the actual number of divisor uses, make sure it meets
12533 // the minimum threshold specified by the target.
12534 if ((Users.size() * NumElts) < MinUses)
12535 return SDValue();
12536
12537 SDLoc DL(N);
12538 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
12539 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
12540
12541 // Dividend / Divisor -> Dividend * Reciprocal
12542 for (auto *U : Users) {
12543 SDValue Dividend = U->getOperand(0);
12544 if (Dividend != FPOne) {
12545 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
12546 Reciprocal, Flags);
12547 CombineTo(U, NewNode);
12548 } else if (U != Reciprocal.getNode()) {
12549 // In the absence of fast-math-flags, this user node is always the
12550 // same node as Reciprocal, but with FMF they may be different nodes.
12551 CombineTo(U, Reciprocal);
12552 }
12553 }
12554 return SDValue(N, 0); // N was replaced.
12555}
12556
12557SDValue DAGCombiner::visitFDIV(SDNode *N) {
12558 SDValue N0 = N->getOperand(0);
12559 SDValue N1 = N->getOperand(1);
12560 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12561 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12562 EVT VT = N->getValueType(0);
12563 SDLoc DL(N);
12564 const TargetOptions &Options = DAG.getTarget().Options;
12565 SDNodeFlags Flags = N->getFlags();
12566
12567 // fold vector ops
12568 if (VT.isVector())
12569 if (SDValue FoldedVOp = SimplifyVBinOp(N))
12570 return FoldedVOp;
12571
12572 // fold (fdiv c1, c2) -> c1/c2
12573 if (N0CFP && N1CFP)
12574 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
12575
12576 if (SDValue NewSel = foldBinOpIntoSelect(N))
12577 return NewSel;
12578
12580 return V;
12581
12582 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
12583 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
12584 if (N1CFP) {
12585 // Compute the reciprocal 1.0 / c2.
12586 const APFloat &N1APF = N1CFP->getValueAPF();
12587 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
12588 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
12589 // Only do the transform if the reciprocal is a legal fp immediate that
12590 // isn't too nasty (eg NaN, denormal, ...).
12591 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
12592 (!LegalOperations ||
12593 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
12594 // backend)... we should handle this gracefully after Legalize.
12595 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
12597 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
12598 return DAG.getNode(ISD::FMUL, DL, VT, N0,
12599 DAG.getConstantFP(Recip, DL, VT), Flags);
12600 }
12601
12602 // If this FDIV is part of a reciprocal square root, it may be folded
12603 // into a target-specific square root estimate instruction.
12604 if (N1.getOpcode() == ISD::FSQRT) {
12605 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
12606 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12607 }
12608 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
12609 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12610 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12611 Flags)) {
12612 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
12613 AddToWorklist(RV.getNode());
12614 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12615 }
12616 } else if (N1.getOpcode() == ISD::FP_ROUND &&
12617 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12618 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12619 Flags)) {
12620 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
12621 AddToWorklist(RV.getNode());
12622 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12623 }
12624 } else if (N1.getOpcode() == ISD::FMUL) {
12625 // Look through an FMUL. Even though this won't remove the FDIV directly,
12626 // it's still worthwhile to get rid of the FSQRT if possible.
12627 SDValue SqrtOp;
12628 SDValue OtherOp;
12629 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12630 SqrtOp = N1.getOperand(0);
12631 OtherOp = N1.getOperand(1);
12632 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
12633 SqrtOp = N1.getOperand(1);
12634 OtherOp = N1.getOperand(0);
12635 }
12636 if (SqrtOp.getNode()) {
12637 // We found a FSQRT, so try to make this fold:
12638 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
12639 if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
12640 RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
12641 AddToWorklist(RV.getNode());
12642 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12643 }
12644 }
12645 }
12646
12647 // Fold into a reciprocal estimate and multiply instead of a real divide.
12648 if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
12649 AddToWorklist(RV.getNode());
12650 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12651 }
12652 }
12653
12654 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
12655 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
12656 ForCodeSize)) {
12657 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
12658 ForCodeSize)) {
12659 // Both can be negated for free, check to see if at least one is cheaper
12660 // negated.
12661 if (LHSNeg == 2 || RHSNeg == 2)
12662 return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
12663 GetNegatedExpression(N0, DAG, LegalOperations,
12664 ForCodeSize),
12665 GetNegatedExpression(N1, DAG, LegalOperations,
12666 ForCodeSize),
12667 Flags);
12668 }
12669 }
12670
12671 return SDValue();
12672}
12673
12674SDValue DAGCombiner::visitFREM(SDNode *N) {
12675 SDValue N0 = N->getOperand(0);
12676 SDValue N1 = N->getOperand(1);
12677 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12678 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12679 EVT VT = N->getValueType(0);
12680
12681 // fold (frem c1, c2) -> fmod(c1,c2)
12682 if (N0CFP && N1CFP)
12683 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
12684
12685 if (SDValue NewSel = foldBinOpIntoSelect(N))
12686 return NewSel;
12687
12688 return SDValue();
12689}
12690
12691SDValue DAGCombiner::visitFSQRT(SDNode *N) {
12692 SDNodeFlags Flags = N->getFlags();
12693 if (!DAG.getTarget().Options.UnsafeFPMath &&
12694 !Flags.hasApproximateFuncs())
12695 return SDValue();
12696
12697 SDValue N0 = N->getOperand(0);
12698 if (TLI.isFsqrtCheap(N0, DAG))
12699 return SDValue();
12700
12701 // FSQRT nodes have flags that propagate to the created nodes.
12702 return buildSqrtEstimate(N0, Flags);
12703}
12704
12705/// copysign(x, fp_extend(y)) -> copysign(x, y)
12706/// copysign(x, fp_round(y)) -> copysign(x, y)
12708 SDValue N1 = N->getOperand(1);
12709 if ((N1.getOpcode() == ISD::FP_EXTEND ||
12710 N1.getOpcode() == ISD::FP_ROUND)) {
12711 // Do not optimize out type conversion of f128 type yet.
12712 // For some targets like x86_64, configuration is changed to keep one f128
12713 // value in one SSE register, but instruction selection cannot handle
12714 // FCOPYSIGN on SSE registers yet.
12715 EVT N1VT = N1->getValueType(0);
12716 EVT N1Op0VT = N1->getOperand(0).getValueType();
12717 return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
12718 }
12719 return false;
12720}
12721
12722SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
12723 SDValue N0 = N->getOperand(0);
12724 SDValue N1 = N->getOperand(1);
12725 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12726 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12727 EVT VT = N->getValueType(0);
12728
12729 if (N0CFP && N1CFP) // Constant fold
12730 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
12731
12732 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
12733 const APFloat &V = N1C->getValueAPF();
12734 // copysign(x, c1) -> fabs(x) iff ispos(c1)
12735 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
12736 if (!V.isNegative()) {
12737 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
12738 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12739 } else {
12740 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12741 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
12742 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
12743 }
12744 }
12745
12746 // copysign(fabs(x), y) -> copysign(x, y)
12747 // copysign(fneg(x), y) -> copysign(x, y)
12748 // copysign(copysign(x,z), y) -> copysign(x, y)
12749 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
12750 N0.getOpcode() == ISD::FCOPYSIGN)
12751 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
12752
12753 // copysign(x, abs(y)) -> abs(x)
12754 if (N1.getOpcode() == ISD::FABS)
12755 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12756
12757 // copysign(x, copysign(y,z)) -> copysign(x, z)
12758 if (N1.getOpcode() == ISD::FCOPYSIGN)
12759 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
12760
12761 // copysign(x, fp_extend(y)) -> copysign(x, y)
12762 // copysign(x, fp_round(y)) -> copysign(x, y)
12764 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
12765
12766 return SDValue();
12767}
12768
12769SDValue DAGCombiner::visitFPOW(SDNode *N) {
12770 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
12771 if (!ExponentC)
12772 return SDValue();
12773
12774 // Try to convert x ** (1/3) into cube root.
12775 // TODO: Handle the various flavors of long double.
12776 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
12777 // Some range near 1/3 should be fine.
12778 EVT VT = N->getValueType(0);
12779 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
12780 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
12781 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
12782 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
12783 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
12784 // For regular numbers, rounding may cause the results to differ.
12785 // Therefore, we require { nsz ninf nnan afn } for this transform.
12786 // TODO: We could select out the special cases if we don't have nsz/ninf.
12787 SDNodeFlags Flags = N->getFlags();
12788 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
12789 !Flags.hasApproximateFuncs())
12790 return SDValue();
12791
12792 // Do not create a cbrt() libcall if the target does not have it, and do not
12793 // turn a pow that has lowering support into a cbrt() libcall.
12794 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
12797 return SDValue();
12798
12799 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
12800 }
12801
12802 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
12803 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
12804 // TODO: This could be extended (using a target hook) to handle smaller
12805 // power-of-2 fractional exponents.
12806 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
12807 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
12808 if (ExponentIs025 || ExponentIs075) {
12809 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
12810 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
12811 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
12812 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
12813 // For regular numbers, rounding may cause the results to differ.
12814 // Therefore, we require { nsz ninf afn } for this transform.
12815 // TODO: We could select out the special cases if we don't have nsz/ninf.
12816 SDNodeFlags Flags = N->getFlags();
12817
12818 // We only need no signed zeros for the 0.25 case.
12819 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
12820 !Flags.hasApproximateFuncs())
12821 return SDValue();
12822
12823 // Don't double the number of libcalls. We are trying to inline fast code.
12825 return SDValue();
12826
12827 // Assume that libcalls are the smallest code.
12828 // TODO: This restriction should probably be lifted for vectors.
12829 if (DAG.getMachineFunction().getFunction().hasOptSize())
12830 return SDValue();
12831
12832 // pow(X, 0.25) --> sqrt(sqrt(X))
12833 SDLoc DL(N);
12834 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
12835 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
12836 if (ExponentIs025)
12837 return SqrtSqrt;
12838 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
12839 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
12840 }
12841
12842 return SDValue();
12843}
12844
12846 const TargetLowering &TLI) {
12847 // This optimization is guarded by a function attribute because it may produce
12848 // unexpected results. Ie, programs may be relying on the platform-specific
12849 // undefined behavior when the float-to-int conversion overflows.
12850 const Function &F = DAG.getMachineFunction().getFunction();
12851 Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
12852 if (StrictOverflow.getValueAsString().equals("false"))
12853 return SDValue();
12854
12855 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
12856 // replacing casts with a libcall. We also must be allowed to ignore -0.0
12857 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
12858 // conversions would return +0.0.
12859 // FIXME: We should be able to use node-level FMF here.
12860 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
12861 EVT VT = N->getValueType(0);
12862 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
12864 return SDValue();
12865
12866 // fptosi/fptoui round towards zero, so converting from FP to integer and
12867 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
12868 SDValue N0 = N->getOperand(0);
12869 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
12870 N0.getOperand(0).getValueType() == VT)
12871 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12872
12873 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
12874 N0.getOperand(0).getValueType() == VT)
12875 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12876
12877 return SDValue();
12878}
12879
12880SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
12881 SDValue N0 = N->getOperand(0);
12882 EVT VT = N->getValueType(0);
12883 EVT OpVT = N0.getValueType();
12884
12885 // [us]itofp(undef) = 0, because the result value is bounded.
12886 if (N0.isUndef())
12887 return DAG.getConstantFP(0.0, SDLoc(N), VT);
12888
12889 // fold (sint_to_fp c1) -> c1fp
12891 // ...but only if the target supports immediate floating-point values
12892 (!LegalOperations ||
12894 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12895
12896 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
12897 // but UINT_TO_FP is legal on this target, try to convert.
12898 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
12899 hasOperation(ISD::UINT_TO_FP, OpVT)) {
12900 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
12901 if (DAG.SignBitIsZero(N0))
12902 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12903 }
12904
12905 // The next optimizations are desirable only if SELECT_CC can be lowered.
12906 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12907 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12908 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
12909 !VT.isVector() &&
12910 (!LegalOperations ||
12912 SDLoc DL(N);
12913 SDValue Ops[] =
12914 { N0.getOperand(0), N0.getOperand(1),
12915 DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12916 N0.getOperand(2) };
12917 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12918 }
12919
12920 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
12921 // (select_cc x, y, 1.0, 0.0,, cc)
12922 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
12923 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
12924 (!LegalOperations ||
12926 SDLoc DL(N);
12927 SDValue Ops[] =
12928 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
12929 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12930 N0.getOperand(0).getOperand(2) };
12931 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12932 }
12933 }
12934
12935 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12936 return FTrunc;
12937
12938 return SDValue();
12939}
12940
12941SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
12942 SDValue N0 = N->getOperand(0);
12943 EVT VT = N->getValueType(0);
12944 EVT OpVT = N0.getValueType();
12945
12946 // [us]itofp(undef) = 0, because the result value is bounded.
12947 if (N0.isUndef())
12948 return DAG.getConstantFP(0.0, SDLoc(N), VT);
12949
12950 // fold (uint_to_fp c1) -> c1fp
12952 // ...but only if the target supports immediate floating-point values
12953 (!LegalOperations ||
12955 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12956
12957 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
12958 // but SINT_TO_FP is legal on this target, try to convert.
12959 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
12960 hasOperation(ISD::SINT_TO_FP, OpVT)) {
12961 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
12962 if (DAG.SignBitIsZero(N0))
12963 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12964 }
12965
12966 // The next optimizations are desirable only if SELECT_CC can be lowered.
12967 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12968 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12969 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
12970 (!LegalOperations ||
12972 SDLoc DL(N);
12973 SDValue Ops[] =
12974 { N0.getOperand(0), N0.getOperand(1),
12975 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12976 N0.getOperand(2) };
12977 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12978 }
12979 }
12980
12981 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12982 return FTrunc;
12983
12984 return SDValue();
12985}
12986
12987// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
12989 SDValue N0 = N->getOperand(0);
12990 EVT VT = N->getValueType(0);
12991
12992 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
12993 return SDValue();
12994
12995 SDValue Src = N0.getOperand(0);
12996 EVT SrcVT = Src.getValueType();
12997 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
12998 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
12999
13000 // We can safely assume the conversion won't overflow the output range,
13001 // because (for example) (uint8_t)18293.f is undefined behavior.
13002
13003 // Since we can assume the conversion won't overflow, our decision as to
13004 // whether the input will fit in the float should depend on the minimum
13005 // of the input range and output range.
13006
13007 // This means this is also safe for a signed input and unsigned output, since
13008 // a negative input would lead to undefined behavior.
13009 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
13010 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
13011 unsigned ActualSize = std::min(InputSize, OutputSize);
13012 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
13013
13014 // We can only fold away the float conversion if the input range can be
13015 // represented exactly in the float range.
13016 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
13017 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
13018 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
13020 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
13021 }
13022 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
13023 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
13024 return DAG.getBitcast(VT, Src);
13025 }
13026 return SDValue();
13027}
13028
13029SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
13030 SDValue N0 = N->getOperand(0);
13031 EVT VT = N->getValueType(0);
13032
13033 // fold (fp_to_sint undef) -> undef
13034 if (N0.isUndef())
13035 return DAG.getUNDEF(VT);
13036
13037 // fold (fp_to_sint c1fp) -> c1
13039 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
13040
13041 return FoldIntToFPToInt(N, DAG);
13042}
13043
13044SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
13045 SDValue N0 = N->getOperand(0);
13046 EVT VT = N->getValueType(0);
13047
13048 // fold (fp_to_uint undef) -> undef
13049 if (N0.isUndef())
13050 return DAG.getUNDEF(VT);
13051
13052 // fold (fp_to_uint c1fp) -> c1
13054 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
13055
13056 return FoldIntToFPToInt(N, DAG);
13057}
13058
13059SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
13060 SDValue N0 = N->getOperand(0);
13061 SDValue N1 = N->getOperand(1);
13062 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13063 EVT VT = N->getValueType(0);
13064
13065 // fold (fp_round c1fp) -> c1fp
13066 if (N0CFP)
13067 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
13068
13069 // fold (fp_round (fp_extend x)) -> x
13070 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
13071 return N0.getOperand(0);
13072
13073 // fold (fp_round (fp_round x)) -> (fp_round x)
13074 if (N0.getOpcode() == ISD::FP_ROUND) {
13075 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
13076 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
13077
13078 // Skip this folding if it results in an fp_round from f80 to f16.
13079 //
13080 // f80 to f16 always generates an expensive (and as yet, unimplemented)
13081 // libcall to __truncxfhf2 instead of selecting native f16 conversion
13082 // instructions from f32 or f64. Moreover, the first (value-preserving)
13083 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
13084 // x86.
13085 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
13086 return SDValue();
13087
13088 // If the first fp_round isn't a value preserving truncation, it might
13089 // introduce a tie in the second fp_round, that wouldn't occur in the
13090 // single-step fp_round we want to fold to.
13091 // In other words, double rounding isn't the same as rounding.
13092 // Also, this is a value preserving truncation iff both fp_round's are.
13093 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
13094 SDLoc DL(N);
13095 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
13096 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
13097 }
13098 }
13099
13100 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
13101 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
13102 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
13103 N0.getOperand(0), N1);
13104 AddToWorklist(Tmp.getNode());
13105 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
13106 Tmp, N0.getOperand(1));
13107 }
13108
13109 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13110 return NewVSel;
13111
13112 return SDValue();
13113}
13114
13115SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
13116 SDValue N0 = N->getOperand(0);
13117 EVT VT = N->getValueType(0);
13118 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
13119 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13120
13121 // fold (fp_round_inreg c1fp) -> c1fp
13122 if (N0CFP && isTypeLegal(EVT)) {
13123 SDLoc DL(N);
13124 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
13125 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
13126 }
13127
13128 return SDValue();
13129}
13130
13131SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
13132 SDValue N0 = N->getOperand(0);
13133 EVT VT = N->getValueType(0);
13134
13135 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
13136 if (N->hasOneUse() &&
13137 N->use_begin()->getOpcode() == ISD::FP_ROUND)
13138 return SDValue();
13139
13140 // fold (fp_extend c1fp) -> c1fp
13142 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
13143
13144 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
13145 if (N0.getOpcode() == ISD::FP16_TO_FP &&
13147 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
13148
13149 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
13150 // value of X.
13151 if (N0.getOpcode() == ISD::FP_ROUND
13152 && N0.getConstantOperandVal(1) == 1) {
13153 SDValue In = N0.getOperand(0);
13154 if (In.getValueType() == VT) return In;
13155 if (VT.bitsLT(In.getValueType()))
13156 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
13157 In, N0.getOperand(1));
13158 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
13159 }
13160
13161 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
13162 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13163 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
13164 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13165 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
13166 LN0->getChain(),
13167 LN0->getBasePtr(), N0.getValueType(),
13168 LN0->getMemOperand());
13169 CombineTo(N, ExtLoad);
13170 CombineTo(N0.getNode(),
13171 DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
13172 N0.getValueType(), ExtLoad,
13173 DAG.getIntPtrConstant(1, SDLoc(N0))),
13174 ExtLoad.getValue(1));
13175 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13176 }
13177
13178 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13179 return NewVSel;
13180
13181 return SDValue();
13182}
13183
13184SDValue DAGCombiner::visitFCEIL(SDNode *N) {
13185 SDValue N0 = N->getOperand(0);
13186 EVT VT = N->getValueType(0);
13187
13188 // fold (fceil c1) -> fceil(c1)
13190 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
13191
13192 return SDValue();
13193}
13194
13195SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
13196 SDValue N0 = N->getOperand(0);
13197 EVT VT = N->getValueType(0);
13198
13199 // fold (ftrunc c1) -> ftrunc(c1)
13201 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
13202
13203 // fold ftrunc (known rounded int x) -> x
13204 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
13205 // likely to be generated to extract integer from a rounded floating value.
13206 switch (N0.getOpcode()) {
13207 default: break;
13208 case ISD::FRINT:
13209 case ISD::FTRUNC:
13210 case ISD::FNEARBYINT:
13211 case ISD::FFLOOR:
13212 case ISD::FCEIL:
13213 return N0;
13214 }
13215
13216 return SDValue();
13217}
13218
13219SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
13220 SDValue N0 = N->getOperand(0);
13221 EVT VT = N->getValueType(0);
13222
13223 // fold (ffloor c1) -> ffloor(c1)
13225 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
13226
13227 return SDValue();
13228}
13229
13230// FIXME: FNEG and FABS have a lot in common; refactor.
13231SDValue DAGCombiner::visitFNEG(SDNode *N) {
13232 SDValue N0 = N->getOperand(0);
13233 EVT VT = N->getValueType(0);
13234
13235 // Constant fold FNEG.
13237 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
13238
13239 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
13240 &DAG.getTarget().Options, ForCodeSize))
13241 return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
13242
13243 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
13244 // constant pool values.
13245 if (!TLI.isFNegFree(VT) &&
13246 N0.getOpcode() == ISD::BITCAST &&
13247 N0.getNode()->hasOneUse()) {
13248 SDValue Int = N0.getOperand(0);
13249 EVT IntVT = Int.getValueType();
13250 if (IntVT.isInteger() && !IntVT.isVector()) {
13251 APInt SignMask;
13252 if (N0.getValueType().isVector()) {
13253 // For a vector, get a mask such as 0x80... per scalar element
13254 // and splat it.
13256 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13257 } else {
13258 // For a scalar, just generate 0x80...
13259 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
13260 }
13261 SDLoc DL0(N0);
13262 Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
13263 DAG.getConstant(SignMask, DL0, IntVT));
13264 AddToWorklist(Int.getNode());
13265 return DAG.getBitcast(VT, Int);
13266 }
13267 }
13268
13269 // (fneg (fmul c, x)) -> (fmul -c, x)
13270 if (N0.getOpcode() == ISD::FMUL &&
13271 (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
13272 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
13273 if (CFP1) {
13274 APFloat CVal = CFP1->getValueAPF();
13275 CVal.changeSign();
13276 if (Level >= AfterLegalizeDAG &&
13277 (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
13279 return DAG.getNode(
13280 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
13281 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
13282 N0->getFlags());
13283 }
13284 }
13285
13286 return SDValue();
13287}
13288
13290 APFloat (*Op)(const APFloat &, const APFloat &)) {
13291 SDValue N0 = N->getOperand(0);
13292 SDValue N1 = N->getOperand(1);
13293 EVT VT = N->getValueType(0);
13294 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
13295 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
13296
13297 if (N0CFP && N1CFP) {
13298 const APFloat &C0 = N0CFP->getValueAPF();
13299 const APFloat &C1 = N1CFP->getValueAPF();
13300 return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
13301 }
13302
13303 // Canonicalize to constant on RHS.
13306 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
13307
13308 return SDValue();
13309}
13310
13311SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
13312 return visitFMinMax(DAG, N, minnum);
13313}
13314
13315SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
13316 return visitFMinMax(DAG, N, maxnum);
13317}
13318
13319SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
13320 return visitFMinMax(DAG, N, minimum);
13321}
13322
13323SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
13324 return visitFMinMax(DAG, N, maximum);
13325}
13326
13327SDValue DAGCombiner::visitFABS(SDNode *N) {
13328 SDValue N0 = N->getOperand(0);
13329 EVT VT = N->getValueType(0);
13330
13331 // fold (fabs c1) -> fabs(c1)
13333 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13334
13335 // fold (fabs (fabs x)) -> (fabs x)
13336 if (N0.getOpcode() == ISD::FABS)
13337 return N->getOperand(0);
13338
13339 // fold (fabs (fneg x)) -> (fabs x)
13340 // fold (fabs (fcopysign x, y)) -> (fabs x)
13341 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
13342 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
13343
13344 // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
13345 if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
13346 SDValue Int = N0.getOperand(0);
13347 EVT IntVT = Int.getValueType();
13348 if (IntVT.isInteger() && !IntVT.isVector()) {
13349 APInt SignMask;
13350 if (N0.getValueType().isVector()) {
13351 // For a vector, get a mask such as 0x7f... per scalar element
13352 // and splat it.
13353 SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
13354 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13355 } else {
13356 // For a scalar, just generate 0x7f...
13357 SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
13358 }
13359 SDLoc DL(N0);
13360 Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
13361 DAG.getConstant(SignMask, DL, IntVT));
13362 AddToWorklist(Int.getNode());
13363 return DAG.getBitcast(N->getValueType(0), Int);
13364 }
13365 }
13366
13367 return SDValue();
13368}
13369
13370SDValue DAGCombiner::visitBRCOND(SDNode *N) {
13371 SDValue Chain = N->getOperand(0);
13372 SDValue N1 = N->getOperand(1);
13373 SDValue N2 = N->getOperand(2);
13374
13375 // If N is a constant we could fold this into a fallthrough or unconditional
13376 // branch. However that doesn't happen very often in normal code, because
13377 // Instcombine/SimplifyCFG should have handled the available opportunities.
13378 // If we did this folding here, it would be necessary to update the
13379 // MachineBasicBlock CFG, which is awkward.
13380
13381 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
13382 // on the target.
13383 if (N1.getOpcode() == ISD::SETCC &&
13385 N1.getOperand(0).getValueType())) {
13386 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13387 Chain, N1.getOperand(2),
13388 N1.getOperand(0), N1.getOperand(1), N2);
13389 }
13390
13391 if (N1.hasOneUse()) {
13392 if (SDValue NewN1 = rebuildSetCC(N1))
13393 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
13394 }
13395
13396 return SDValue();
13397}
13398
13399SDValue DAGCombiner::rebuildSetCC(SDValue N) {
13400 if (N.getOpcode() == ISD::SRL ||
13401 (N.getOpcode() == ISD::TRUNCATE &&
13402 (N.getOperand(0).hasOneUse() &&
13403 N.getOperand(0).getOpcode() == ISD::SRL))) {
13404 // Look pass the truncate.
13405 if (N.getOpcode() == ISD::TRUNCATE)
13406 N = N.getOperand(0);
13407
13408 // Match this pattern so that we can generate simpler code:
13409 //
13410 // %a = ...
13411 // %b = and i32 %a, 2
13412 // %c = srl i32 %b, 1
13413 // brcond i32 %c ...
13414 //
13415 // into
13416 //
13417 // %a = ...
13418 // %b = and i32 %a, 2
13419 // %c = setcc eq %b, 0
13420 // brcond %c ...
13421 //
13422 // This applies only when the AND constant value has one bit set and the
13423 // SRL constant is equal to the log2 of the AND constant. The back-end is
13424 // smart enough to convert the result into a TEST/JMP sequence.
13425 SDValue Op0 = N.getOperand(0);
13426 SDValue Op1 = N.getOperand(1);
13427
13428 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
13429 SDValue AndOp1 = Op0.getOperand(1);
13430
13431 if (AndOp1.getOpcode() == ISD::Constant) {
13432 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
13433
13434 if (AndConst.isPowerOf2() &&
13435 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
13436 SDLoc DL(N);
13437 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
13438 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
13439 ISD::SETNE);
13440 }
13441 }
13442 }
13443 }
13444
13445 // Transform br(xor(x, y)) -> br(x != y)
13446 // Transform br(xor(xor(x,y), 1)) -> br (x == y)
13447 if (N.getOpcode() == ISD::XOR) {
13448 // Because we may call this on a speculatively constructed
13449 // SimplifiedSetCC Node, we need to simplify this node first.
13450 // Ideally this should be folded into SimplifySetCC and not
13451 // here. For now, grab a handle to N so we don't lose it from
13452 // replacements interal to the visit.
13453 HandleSDNode XORHandle(N);
13454 while (N.getOpcode() == ISD::XOR) {
13455 SDValue Tmp = visitXOR(N.getNode());
13456 // No simplification done.
13457 if (!Tmp.getNode())
13458 break;
13459 // Returning N is form in-visit replacement that may invalidated
13460 // N. Grab value from Handle.
13461 if (Tmp.getNode() == N.getNode())
13462 N = XORHandle.getValue();
13463 else // Node simplified. Try simplifying again.
13464 N = Tmp;
13465 }
13466
13467 if (N.getOpcode() != ISD::XOR)
13468 return N;
13469
13470 SDNode *TheXor = N.getNode();
13471
13472 SDValue Op0 = TheXor->getOperand(0);
13473 SDValue Op1 = TheXor->getOperand(1);
13474
13475 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
13476 bool Equal = false;
13477 if (isOneConstant(Op0) && Op0.hasOneUse() &&
13478 Op0.getOpcode() == ISD::XOR) {
13479 TheXor = Op0.getNode();
13480 Equal = true;
13481 }
13482
13483 EVT SetCCVT = N.getValueType();
13484 if (LegalTypes)
13485 SetCCVT = getSetCCResultType(SetCCVT);
13486 // Replace the uses of XOR with SETCC
13487 return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
13488 Equal ? ISD::SETEQ : ISD::SETNE);
13489 }
13490 }
13491
13492 return SDValue();
13493}
13494
13495// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
13496//
13497SDValue DAGCombiner::visitBR_CC(SDNode *N) {
13498 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
13499 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
13500
13501 // If N is a constant we could fold this into a fallthrough or unconditional
13502 // branch. However that doesn't happen very often in normal code, because
13503 // Instcombine/SimplifyCFG should have handled the available opportunities.
13504 // If we did this folding here, it would be necessary to update the
13505 // MachineBasicBlock CFG, which is awkward.
13506
13507 // Use SimplifySetCC to simplify SETCC's.
13509 CondLHS, CondRHS, CC->get(), SDLoc(N),
13510 false);
13511 if (Simp.getNode()) AddToWorklist(Simp.getNode());
13512
13513 // fold to a simpler setcc
13514 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
13515 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13516 N->getOperand(0), Simp.getOperand(2),
13517 Simp.getOperand(0), Simp.getOperand(1),
13518 N->getOperand(4));
13519
13520 return SDValue();
13521}
13522
13523/// Return true if 'Use' is a load or a store that uses N as its base pointer
13524/// and that N may be folded in the load / store addressing mode.
13526 SelectionDAG &DAG,
13527 const TargetLowering &TLI) {
13528 EVT VT;
13529 unsigned AS;
13530
13531 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
13532 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
13533 return false;
13534 VT = LD->getMemoryVT();
13535 AS = LD->getAddressSpace();
13536 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
13537 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
13538 return false;
13539 VT = ST->getMemoryVT();
13540 AS = ST->getAddressSpace();
13541 } else
13542 return false;
13543
13545 if (N->getOpcode() == ISD::ADD) {
13546 AM.HasBaseReg = true;
13547 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13548 if (Offset)
13549 // [reg +/- imm]
13550 AM.BaseOffs = Offset->getSExtValue();
13551 else
13552 // [reg +/- reg]
13553 AM.Scale = 1;
13554 } else if (N->getOpcode() == ISD::SUB) {
13555 AM.HasBaseReg = true;
13556 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13557 if (Offset)
13558 // [reg +/- imm]
13559 AM.BaseOffs = -Offset->getSExtValue();
13560 else
13561 // [reg +/- reg]
13562 AM.Scale = 1;
13563 } else
13564 return false;
13565
13566 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
13567 VT.getTypeForEVT(*DAG.getContext()), AS);
13568}
13569
13570/// Try turning a load/store into a pre-indexed load/store when the base
13571/// pointer is an add or subtract and it has other uses besides the load/store.
13572/// After the transformation, the new indexed load/store has effectively folded
13573/// the add/subtract in and all of its other uses are redirected to the
13574/// new load/store.
13575bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
13576 if (Level < AfterLegalizeDAG)
13577 return false;
13578
13579 bool isLoad = true;
13580 SDValue Ptr;
13581 EVT VT;
13582 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
13583 if (LD->isIndexed())
13584 return false;
13585 VT = LD->getMemoryVT();
13586 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
13588 return false;
13589 Ptr = LD->getBasePtr();
13590 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
13591 if (ST->isIndexed())
13592 return false;
13593 VT = ST->getMemoryVT();
13594 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
13596 return false;
13597 Ptr = ST->getBasePtr();
13598 isLoad = false;
13599 } else {
13600 return false;
13601 }
13602
13603 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
13604 // out. There is no reason to make this a preinc/predec.
13605 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
13606 Ptr.getNode()->hasOneUse())
13607 return false;
13608
13609 // Ask the target to do addressing mode selection.
13613 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
13614 return false;
13615
13616 // Backends without true r+i pre-indexed forms may need to pass a
13617 // constant base with a variable offset so that constant coercion
13618 // will work with the patterns in canonical form.
13619 bool Swapped = false;
13620 if (isa<ConstantSDNode>(BasePtr)) {
13621 std::swap(BasePtr, Offset);
13622 Swapped = true;
13623 }
13624
13625 // Don't create a indexed load / store with zero offset.
13626 if (isNullConstant(Offset))
13627 return false;
13628
13629 // Try turning it into a pre-indexed load / store except when:
13630 // 1) The new base ptr is a frame index.
13631 // 2) If N is a store and the new base ptr is either the same as or is a
13632 // predecessor of the value being stored.
13633 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
13634 // that would create a cycle.
13635 // 4) All uses are load / store ops that use it as old base ptr.
13636
13637 // Check #1. Preinc'ing a frame index would require copying the stack pointer
13638 // (plus the implicit offset) to a register to preinc anyway.
13639 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13640 return false;
13641
13642 // Check #2.
13643 if (!isLoad) {
13644 SDValue Val = cast<StoreSDNode>(N)->getValue();
13645
13646 // Would require a copy.
13647 if (Val == BasePtr)
13648 return false;
13649
13650 // Would create a cycle.
13651 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
13652 return false;
13653 }
13654
13655 // Caches for hasPredecessorHelper.
13658 Worklist.push_back(N);
13659
13660 // If the offset is a constant, there may be other adds of constants that
13661 // can be folded with this one. We should do this to avoid having to keep
13662 // a copy of the original base pointer.
13663 SmallVector<SDNode *, 16> OtherUses;
13664 if (isa<ConstantSDNode>(Offset))
13665 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
13666 UE = BasePtr.getNode()->use_end();
13667 UI != UE; ++UI) {
13668 SDUse &Use = UI.getUse();
13669 // Skip the use that is Ptr and uses of other results from BasePtr's
13670 // node (important for nodes that return multiple results).
13671 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
13672 continue;
13673
13674 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
13675 continue;
13676
13677 if (Use.getUser()->getOpcode() != ISD::ADD &&
13678 Use.getUser()->getOpcode() != ISD::SUB) {
13679 OtherUses.clear();
13680 break;
13681 }
13682
13683 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
13684 if (!isa<ConstantSDNode>(Op1)) {
13685 OtherUses.clear();
13686 break;
13687 }
13688
13689 // FIXME: In some cases, we can be smarter about this.
13690 if (Op1.getValueType() != Offset.getValueType()) {
13691 OtherUses.clear();
13692 break;
13693 }
13694
13695 OtherUses.push_back(Use.getUser());
13696 }
13697
13698 if (Swapped)
13699 std::swap(BasePtr, Offset);
13700
13701 // Now check for #3 and #4.
13702 bool RealUse = false;
13703
13704 for (SDNode *Use : Ptr.getNode()->uses()) {
13705 if (Use == N)
13706 continue;
13707 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
13708 return false;
13709
13710 // If Ptr may be folded in addressing mode of other use, then it's
13711 // not profitable to do this transformation.
13712 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
13713 RealUse = true;
13714 }
13715
13716 if (!RealUse)
13717 return false;
13718
13720 if (isLoad)
13721 Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13722 BasePtr, Offset, AM);
13723 else
13725 BasePtr, Offset, AM);
13726 ++PreIndexedNodes;
13727 ++NodesCombined;
13728 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
13729 Result.getNode()->dump(&DAG); dbgs() << '\n');
13730 WorklistRemover DeadNodes(*this);
13731 if (isLoad) {
13732 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13733 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13734 } else {
13735 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13736 }
13737
13738 // Finally, since the node is now dead, remove it from the graph.
13739 deleteAndRecombine(N);
13740
13741 if (Swapped)
13742 std::swap(BasePtr, Offset);
13743
13744 // Replace other uses of BasePtr that can be updated to use Ptr
13745 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
13746 unsigned OffsetIdx = 1;
13747 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
13748 OffsetIdx = 0;
13749 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
13750 BasePtr.getNode() && "Expected BasePtr operand");
13751
13752 // We need to replace ptr0 in the following expression:
13753 // x0 * offset0 + y0 * ptr0 = t0
13754 // knowing that
13755 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
13756 //
13757 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
13758 // indexed load/store and the expression that needs to be re-written.
13759 //
13760 // Therefore, we have:
13761 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
13762
13763 ConstantSDNode *CN =
13764 cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
13765 int X0, X1, Y0, Y1;
13766 const APInt &Offset0 = CN->getAPIntValue();
13767 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
13768
13769 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
13770 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
13771 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
13772 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
13773
13774 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
13775
13776 APInt CNV = Offset0;
13777 if (X0 < 0) CNV = -CNV;
13778 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
13779 else CNV = CNV - Offset1;
13780
13781 SDLoc DL(OtherUses[i]);
13782
13783 // We can now generate the new expression.
13784 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
13785 SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
13786
13787 SDValue NewUse = DAG.getNode(Opcode,
13788 DL,
13789 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
13790 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
13791 deleteAndRecombine(OtherUses[i]);
13792 }
13793
13794 // Replace the uses of Ptr with uses of the updated base value.
13795 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
13796 deleteAndRecombine(Ptr.getNode());
13797 AddToWorklist(Result.getNode());
13798
13799 return true;
13800}
13801
13802/// Try to combine a load/store with a add/sub of the base pointer node into a
13803/// post-indexed load/store. The transformation folded the add/subtract into the
13804/// new indexed load/store effectively and all of its uses are redirected to the
13805/// new load/store.
13806bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
13807 if (Level < AfterLegalizeDAG)
13808 return false;
13809
13810 bool isLoad = true;
13811 SDValue Ptr;
13812 EVT VT;
13813 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
13814 if (LD->isIndexed())
13815 return false;
13816 VT = LD->getMemoryVT();
13817 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
13819 return false;
13820 Ptr = LD->getBasePtr();
13821 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
13822 if (ST->isIndexed())
13823 return false;
13824 VT = ST->getMemoryVT();
13825 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
13827 return false;
13828 Ptr = ST->getBasePtr();
13829 isLoad = false;
13830 } else {
13831 return false;
13832 }
13833
13834 if (Ptr.getNode()->hasOneUse())
13835 return false;
13836
13837 for (SDNode *Op : Ptr.getNode()->uses()) {
13838 if (Op == N ||
13839 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
13840 continue;
13841
13845 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
13846 // Don't create a indexed load / store with zero offset.
13847 if (isNullConstant(Offset))
13848 continue;
13849
13850 // Try turning it into a post-indexed load / store except when
13851 // 1) All uses are load / store ops that use it as base ptr (and
13852 // it may be folded as addressing mmode).
13853 // 2) Op must be independent of N, i.e. Op is neither a predecessor
13854 // nor a successor of N. Otherwise, if Op is folded that would
13855 // create a cycle.
13856
13857 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13858 continue;
13859
13860 // Check for #1.
13861 bool TryNext = false;
13862 for (SDNode *Use : BasePtr.getNode()->uses()) {
13863 if (Use == Ptr.getNode())
13864 continue;
13865
13866 // If all the uses are load / store addresses, then don't do the
13867 // transformation.
13868 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
13869 bool RealUse = false;
13870 for (SDNode *UseUse : Use->uses()) {
13871 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
13872 RealUse = true;
13873 }
13874
13875 if (!RealUse) {
13876 TryNext = true;
13877 break;
13878 }
13879 }
13880 }
13881
13882 if (TryNext)
13883 continue;
13884
13885 // Check for #2.
13888 // Ptr is predecessor to both N and Op.
13889 Visited.insert(Ptr.getNode());
13890 Worklist.push_back(N);
13891 Worklist.push_back(Op);
13892 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
13893 !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
13895 ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13896 BasePtr, Offset, AM)
13897 : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13898 BasePtr, Offset, AM);
13899 ++PostIndexedNodes;
13900 ++NodesCombined;
13901 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
13902 dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
13903 dbgs() << '\n');
13904 WorklistRemover DeadNodes(*this);
13905 if (isLoad) {
13906 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13907 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13908 } else {
13909 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13910 }
13911
13912 // Finally, since the node is now dead, remove it from the graph.
13913 deleteAndRecombine(N);
13914
13915 // Replace the uses of Use with uses of the updated base value.
13917 Result.getValue(isLoad ? 1 : 0));
13918 deleteAndRecombine(Op);
13919 return true;
13920 }
13921 }
13922 }
13923
13924 return false;
13925}
13926
13927/// Return the base-pointer arithmetic from an indexed \p LD.
13928SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
13929 ISD::MemIndexedMode AM = LD->getAddressingMode();
13930 assert(AM != ISD::UNINDEXED);
13931 SDValue BP = LD->getOperand(1);
13932 SDValue Inc = LD->getOperand(2);
13933
13934 // Some backends use TargetConstants for load offsets, but don't expect
13935 // TargetConstants in general ADD nodes. We can convert these constants into
13936 // regular Constants (if the constant is not opaque).
13938 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
13939 "Cannot split out indexing using opaque target constants");
13940 if (Inc.getOpcode() == ISD::TargetConstant) {
13941 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
13942 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
13943 ConstInc->getValueType(0));
13944 }
13945
13946 unsigned Opc =
13947 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
13948 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
13949}
13950
13951static inline int numVectorEltsOrZero(EVT T) {
13952 return T.isVector() ? T.getVectorNumElements() : 0;
13953}
13954
13955bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
13956 Val = ST->getValue();
13957 EVT STType = Val.getValueType();
13958 EVT STMemType = ST->getMemoryVT();
13959 if (STType == STMemType)
13960 return true;
13961 if (isTypeLegal(STMemType))
13962 return false; // fail.
13963 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
13964 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
13965 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
13966 return true;
13967 }
13968 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
13969 STType.isInteger() && STMemType.isInteger()) {
13970 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
13971 return true;
13972 }
13973 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
13974 Val = DAG.getBitcast(STMemType, Val);
13975 return true;
13976 }
13977 return false; // fail.
13978}
13979
13980bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
13981 EVT LDMemType = LD->getMemoryVT();
13982 EVT LDType = LD->getValueType(0);
13983 assert(Val.getValueType() == LDMemType &&
13984 "Attempting to extend value of non-matching type");
13985 if (LDType == LDMemType)
13986 return true;
13987 if (LDMemType.isInteger() && LDType.isInteger()) {
13988 switch (LD->getExtensionType()) {
13989 case ISD::NON_EXTLOAD:
13990 Val = DAG.getBitcast(LDType, Val);
13991 return true;
13992 case ISD::EXTLOAD:
13993 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
13994 return true;
13995 case ISD::SEXTLOAD:
13996 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
13997 return true;
13998 case ISD::ZEXTLOAD:
13999 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
14000 return true;
14001 }
14002 }
14003 return false;
14004}
14005
14006SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
14007 if (OptLevel == CodeGenOpt::None || LD->isVolatile())
14008 return SDValue();
14009 SDValue Chain = LD->getOperand(0);
14010 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
14011 if (!ST || ST->isVolatile())
14012 return SDValue();
14013
14014 EVT LDType = LD->getValueType(0);
14015 EVT LDMemType = LD->getMemoryVT();
14016 EVT STMemType = ST->getMemoryVT();
14017 EVT STType = ST->getValue().getValueType();
14018
14019 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
14020 BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
14021 int64_t Offset;
14022 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
14023 return SDValue();
14024
14025 // Normalize for Endianness. After this Offset=0 will denote that the least
14026 // significant bit in the loaded value maps to the least significant bit in
14027 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
14028 // n:th least significant byte of the stored value.
14029 if (DAG.getDataLayout().isBigEndian())
14030 Offset = (STMemType.getStoreSizeInBits() -
14031 LDMemType.getStoreSizeInBits()) / 8 - Offset;
14032
14033 // Check that the stored value cover all bits that are loaded.
14034 bool STCoversLD =
14035 (Offset >= 0) &&
14036 (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
14037
14038 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
14039 if (LD->isIndexed()) {
14040 bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
14041 LD->getAddressingMode() == ISD::POST_DEC);
14042 unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
14043 SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
14044 LD->getOperand(1), LD->getOperand(2));
14045 SDValue Ops[] = {Val, Idx, Chain};
14046 return CombineTo(LD, Ops, 3);
14047 }
14048 return CombineTo(LD, Val, Chain);
14049 };
14050
14051 if (!STCoversLD)
14052 return SDValue();
14053
14054 // Memory as copy space (potentially masked).
14055 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
14056 // Simple case: Direct non-truncating forwarding
14057 if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
14058 return ReplaceLd(LD, ST->getValue(), Chain);
14059 // Can we model the truncate and extension with an and mask?
14060 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
14061 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
14062 // Mask to size of LDMemType
14063 auto Mask =
14065 STMemType.getSizeInBits()),
14066 SDLoc(ST), STType);
14067 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
14068 return ReplaceLd(LD, Val, Chain);
14069 }
14070 }
14071
14072 // TODO: Deal with nonzero offset.
14073 if (LD->getBasePtr().isUndef() || Offset != 0)
14074 return SDValue();
14075 // Model necessary truncations / extenstions.
14076 SDValue Val;
14077 // Truncate Value To Stored Memory Size.
14078 do {
14079 if (!getTruncatedStoreValue(ST, Val))
14080 continue;
14081 if (!isTypeLegal(LDMemType))
14082 continue;
14083 if (STMemType != LDMemType) {
14084 // TODO: Support vectors? This requires extract_subvector/bitcast.
14085 if (!STMemType.isVector() && !LDMemType.isVector() &&
14086 STMemType.isInteger() && LDMemType.isInteger())
14087 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
14088 else
14089 continue;
14090 }
14091 if (!extendLoadedValueToExtension(LD, Val))
14092 continue;
14093 return ReplaceLd(LD, Val, Chain);
14094 } while (false);
14095
14096 // On failure, cleanup dead nodes we may have created.
14097 if (Val->use_empty())
14098 deleteAndRecombine(Val.getNode());
14099 return SDValue();
14100}
14101
14102SDValue DAGCombiner::visitLOAD(SDNode *N) {
14103 LoadSDNode *LD = cast<LoadSDNode>(N);
14104 SDValue Chain = LD->getChain();
14105 SDValue Ptr = LD->getBasePtr();
14106
14107 // If load is not volatile and there are no uses of the loaded value (and
14108 // the updated indexed value in case of indexed loads), change uses of the
14109 // chain value into uses of the chain input (i.e. delete the dead load).
14110 if (!LD->isVolatile()) {
14111 if (N->getValueType(1) == MVT::Other) {
14112 // Unindexed loads.
14113 if (!N->hasAnyUseOfValue(0)) {
14114 // It's not safe to use the two value CombineTo variant here. e.g.
14115 // v1, chain2 = load chain1, loc
14116 // v2, chain3 = load chain2, loc
14117 // v3 = add v2, c
14118 // Now we replace use of chain2 with chain1. This makes the second load
14119 // isomorphic to the one we are deleting, and thus makes this load live.
14120 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
14121 dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
14122 dbgs() << "\n");
14123 WorklistRemover DeadNodes(*this);
14124 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14125 AddUsersToWorklist(Chain.getNode());
14126 if (N->use_empty())
14127 deleteAndRecombine(N);
14128
14129 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14130 }
14131 } else {
14132 // Indexed loads.
14133 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
14134
14135 // If this load has an opaque TargetConstant offset, then we cannot split
14136 // the indexing into an add/sub directly (that TargetConstant may not be
14137 // valid for a different type of node, and we cannot convert an opaque
14138 // target constant into a regular constant).
14139 bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
14140 cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
14141
14142 if (!N->hasAnyUseOfValue(0) &&
14143 ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
14144 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
14145 SDValue Index;
14146 if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
14147 Index = SplitIndexingFromLoad(LD);
14148 // Try to fold the base pointer arithmetic into subsequent loads and
14149 // stores.
14150 AddUsersToWorklist(N);
14151 } else
14152 Index = DAG.getUNDEF(N->getValueType(1));
14153 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
14154 dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
14155 dbgs() << " and 2 other values\n");
14156 WorklistRemover DeadNodes(*this);
14157 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
14158 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
14159 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
14160 deleteAndRecombine(N);
14161 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14162 }
14163 }
14164 }
14165
14166 // If this load is directly stored, replace the load value with the stored
14167 // value.
14168 if (auto V = ForwardStoreValueToDirectLoad(LD))
14169 return V;
14170
14171 // Try to infer better alignment information than the load already has.
14172 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
14173 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
14174 if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
14175 SDValue NewLoad = DAG.getExtLoad(
14176 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
14177 LD->getPointerInfo(), LD->getMemoryVT(), Align,
14178 LD->getMemOperand()->getFlags(), LD->getAAInfo());
14179 // NewLoad will always be N as we are only refining the alignment
14180 assert(NewLoad.getNode() == N);
14181 (void)NewLoad;
14182 }
14183 }
14184 }
14185
14186 if (LD->isUnindexed()) {
14187 // Walk up chain skipping non-aliasing memory nodes.
14188 SDValue BetterChain = FindBetterChain(LD, Chain);
14189
14190 // If there is a better chain.
14191 if (Chain != BetterChain) {
14192 SDValue ReplLoad;
14193
14194 // Replace the chain to void dependency.
14195 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
14196 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
14197 BetterChain, Ptr, LD->getMemOperand());
14198 } else {
14199 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
14200 LD->getValueType(0),
14201 BetterChain, Ptr, LD->getMemoryVT(),
14202 LD->getMemOperand());
14203 }
14204
14205 // Create token factor to keep old chain connected.
14206 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
14207 MVT::Other, Chain, ReplLoad.getValue(1));
14208
14209 // Replace uses with load result and token factor
14210 return CombineTo(N, ReplLoad.getValue(0), Token);
14211 }
14212 }
14213
14214 // Try transforming N to an indexed load.
14215 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14216 return SDValue(N, 0);
14217
14218 // Try to slice up N to more direct loads if the slices are mapped to
14219 // different register banks or pairing can take place.
14220 if (SliceUpLoad(N))
14221 return SDValue(N, 0);
14222
14223 return SDValue();
14224}
14225
14226namespace {
14227
14228/// Helper structure used to slice a load in smaller loads.
14229/// Basically a slice is obtained from the following sequence:
14230/// Origin = load Ty1, Base
14231/// Shift = srl Ty1 Origin, CstTy Amount
14232/// Inst = trunc Shift to Ty2
14233///
14234/// Then, it will be rewritten into:
14235/// Slice = load SliceTy, Base + SliceOffset
14236/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
14237///
14238/// SliceTy is deduced from the number of bits that are actually used to
14239/// build Inst.
14240struct LoadedSlice {
14241 /// Helper structure used to compute the cost of a slice.
14242 struct Cost {
14243 /// Are we optimizing for code size.
14244 bool ForCodeSize;
14245
14246 /// Various cost.
14247 unsigned Loads = 0;
14248 unsigned Truncates = 0;
14249 unsigned CrossRegisterBanksCopies = 0;
14250 unsigned ZExts = 0;
14251 unsigned Shift = 0;
14252
14253 Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
14254
14255 /// Get the cost of one isolated slice.
14256 Cost(const LoadedSlice &LS, bool ForCodeSize = false)
14257 : ForCodeSize(ForCodeSize), Loads(1) {
14258 EVT TruncType = LS.Inst->getValueType(0);
14259 EVT LoadedType = LS.getLoadedType();
14260 if (TruncType != LoadedType &&
14261 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
14262 ZExts = 1;
14263 }
14264
14265 /// Account for slicing gain in the current cost.
14266 /// Slicing provide a few gains like removing a shift or a
14267 /// truncate. This method allows to grow the cost of the original
14268 /// load with the gain from this slice.
14269 void addSliceGain(const LoadedSlice &LS) {
14270 // Each slice saves a truncate.
14271 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
14272 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
14273 LS.Inst->getValueType(0)))
14274 ++Truncates;
14275 // If there is a shift amount, this slice gets rid of it.
14276 if (LS.Shift)
14277 ++Shift;
14278 // If this slice can merge a cross register bank copy, account for it.
14279 if (LS.canMergeExpensiveCrossRegisterBankCopy())
14280 ++CrossRegisterBanksCopies;
14281 }
14282
14283 Cost &operator+=(const Cost &RHS) {
14284 Loads += RHS.Loads;
14285 Truncates += RHS.Truncates;
14286 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
14287 ZExts += RHS.ZExts;
14288 Shift += RHS.Shift;
14289 return *this;
14290 }
14291
14292 bool operator==(const Cost &RHS) const {
14293 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
14294 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
14295 ZExts == RHS.ZExts && Shift == RHS.Shift;
14296 }
14297
14298 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
14299
14300 bool operator<(const Cost &RHS) const {
14301 // Assume cross register banks copies are as expensive as loads.
14302 // FIXME: Do we want some more target hooks?
14303 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
14304 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
14305 // Unless we are optimizing for code size, consider the
14306 // expensive operation first.
14307 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
14308 return ExpensiveOpsLHS < ExpensiveOpsRHS;
14309 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
14310 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
14311 }
14312
14313 bool operator>(const Cost &RHS) const { return RHS < *this; }
14314
14315 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
14316
14317 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
14318 };
14319
14320 // The last instruction that represent the slice. This should be a
14321 // truncate instruction.
14322 SDNode *Inst;
14323
14324 // The original load instruction.
14325 LoadSDNode *Origin;
14326
14327 // The right shift amount in bits from the original load.
14328 unsigned Shift;
14329
14330 // The DAG from which Origin came from.
14331 // This is used to get some contextual information about legal types, etc.
14332 SelectionDAG *DAG;
14333
14334 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
14335 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
14336 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
14337
14338 /// Get the bits used in a chunk of bits \p BitWidth large.
14339 /// \return Result is \p BitWidth and has used bits set to 1 and
14340 /// not used bits set to 0.
14341 APInt getUsedBits() const {
14342 // Reproduce the trunc(lshr) sequence:
14343 // - Start from the truncated value.
14344 // - Zero extend to the desired bit width.
14345 // - Shift left.
14346 assert(Origin && "No original load to compare against.");
14347 unsigned BitWidth = Origin->getValueSizeInBits(0);
14348 assert(Inst && "This slice is not bound to an instruction");
14349 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
14350 "Extracted slice is bigger than the whole type!");
14351 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
14352 UsedBits.setAllBits();
14353 UsedBits = UsedBits.zext(BitWidth);
14354 UsedBits <<= Shift;
14355 return UsedBits;
14356 }
14357
14358 /// Get the size of the slice to be loaded in bytes.
14359 unsigned getLoadedSize() const {
14360 unsigned SliceSize = getUsedBits().countPopulation();
14361 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
14362 return SliceSize / 8;
14363 }
14364
14365 /// Get the type that will be loaded for this slice.
14366 /// Note: This may not be the final type for the slice.
14367 EVT getLoadedType() const {
14368 assert(DAG && "Missing context");
14369 LLVMContext &Ctxt = *DAG->getContext();
14370 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
14371 }
14372
14373 /// Get the alignment of the load used for this slice.
14374 unsigned getAlignment() const {
14375 unsigned Alignment = Origin->getAlignment();
14376 uint64_t Offset = getOffsetFromBase();
14377 if (Offset != 0)
14378 Alignment = MinAlign(Alignment, Alignment + Offset);
14379 return Alignment;
14380 }
14381
14382 /// Check if this slice can be rewritten with legal operations.
14383 bool isLegal() const {
14384 // An invalid slice is not legal.
14385 if (!Origin || !Inst || !DAG)
14386 return false;
14387
14388 // Offsets are for indexed load only, we do not handle that.
14389 if (!Origin->getOffset().isUndef())
14390 return false;
14391
14392 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14393
14394 // Check that the type is legal.
14395 EVT SliceType = getLoadedType();
14396 if (!TLI.isTypeLegal(SliceType))
14397 return false;
14398
14399 // Check that the load is legal for this type.
14400 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
14401 return false;
14402
14403 // Check that the offset can be computed.
14404 // 1. Check its type.
14405 EVT PtrType = Origin->getBasePtr().getValueType();
14406 if (PtrType == MVT::Untyped || PtrType.isExtended())
14407 return false;
14408
14409 // 2. Check that it fits in the immediate.
14410 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
14411 return false;
14412
14413 // 3. Check that the computation is legal.
14414 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
14415 return false;
14416
14417 // Check that the zext is legal if it needs one.
14418 EVT TruncateType = Inst->getValueType(0);
14419 if (TruncateType != SliceType &&
14420 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
14421 return false;
14422
14423 return true;
14424 }
14425
14426 /// Get the offset in bytes of this slice in the original chunk of
14427 /// bits.
14428 /// \pre DAG != nullptr.
14429 uint64_t getOffsetFromBase() const {
14430 assert(DAG && "Missing context.");
14431 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
14432 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
14433 uint64_t Offset = Shift / 8;
14434 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
14435 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
14436 "The size of the original loaded type is not a multiple of a"
14437 " byte.");
14438 // If Offset is bigger than TySizeInBytes, it means we are loading all
14439 // zeros. This should have been optimized before in the process.
14440 assert(TySizeInBytes > Offset &&
14441 "Invalid shift amount for given loaded size");
14442 if (IsBigEndian)
14443 Offset = TySizeInBytes - Offset - getLoadedSize();
14444 return Offset;
14445 }
14446
14447 /// Generate the sequence of instructions to load the slice
14448 /// represented by this object and redirect the uses of this slice to
14449 /// this new sequence of instructions.
14450 /// \pre this->Inst && this->Origin are valid Instructions and this
14451 /// object passed the legal check: LoadedSlice::isLegal returned true.
14452 /// \return The last instruction of the sequence used to load the slice.
14453 SDValue loadSlice() const {
14454 assert(Inst && Origin && "Unable to replace a non-existing slice.");
14455 const SDValue &OldBaseAddr = Origin->getBasePtr();
14456 SDValue BaseAddr = OldBaseAddr;
14457 // Get the offset in that chunk of bytes w.r.t. the endianness.
14458 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
14459 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
14460 if (Offset) {
14461 // BaseAddr = BaseAddr + Offset.
14462 EVT ArithType = BaseAddr.getValueType();
14463 SDLoc DL(Origin);
14464 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
14465 DAG->getConstant(Offset, DL, ArithType));
14466 }
14467
14468 // Create the type of the loaded slice according to its size.
14469 EVT SliceType = getLoadedType();
14470
14471 // Create the load for the slice.
14472 SDValue LastInst =
14473 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
14474 Origin->getPointerInfo().getWithOffset(Offset),
14475 getAlignment(), Origin->getMemOperand()->getFlags());
14476 // If the final type is not the same as the loaded type, this means that
14477 // we have to pad with zero. Create a zero extend for that.
14478 EVT FinalType = Inst->getValueType(0);
14479 if (SliceType != FinalType)
14480 LastInst =
14481 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
14482 return LastInst;
14483 }
14484
14485 /// Check if this slice can be merged with an expensive cross register
14486 /// bank copy. E.g.,
14487 /// i = load i32
14488 /// f = bitcast i32 i to float
14489 bool canMergeExpensiveCrossRegisterBankCopy() const {
14490 if (!Inst || !Inst->hasOneUse())
14491 return false;
14492 SDNode *Use = *Inst->use_begin();
14493 if (Use->getOpcode() != ISD::BITCAST)
14494 return false;
14495 assert(DAG && "Missing context");
14496 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14497 EVT ResVT = Use->getValueType(0);
14498 const TargetRegisterClass *ResRC =
14499 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
14500 const TargetRegisterClass *ArgRC =
14501 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
14502 Use->getOperand(0)->isDivergent());
14503 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
14504 return false;
14505
14506 // At this point, we know that we perform a cross-register-bank copy.
14507 // Check if it is expensive.
14509 // Assume bitcasts are cheap, unless both register classes do not
14510 // explicitly share a common sub class.
14511 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
14512 return false;
14513
14514 // Check if it will be merged with the load.
14515 // 1. Check the alignment constraint.
14516 unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
14517 ResVT.getTypeForEVT(*DAG->getContext()));
14518
14519 if (RequiredAlignment > getAlignment())
14520 return false;
14521
14522 // 2. Check that the load is a legal operation for that type.
14523 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
14524 return false;
14525
14526 // 3. Check that we do not have a zext in the way.
14527 if (Inst->getValueType(0) != getLoadedType())
14528 return false;
14529
14530 return true;
14531 }
14532};
14533
14534} // end anonymous namespace
14535
14536/// Check that all bits set in \p UsedBits form a dense region, i.e.,
14537/// \p UsedBits looks like 0..0 1..1 0..0.
14538static bool areUsedBitsDense(const APInt &UsedBits) {
14539 // If all the bits are one, this is dense!
14540 if (UsedBits.isAllOnesValue())
14541 return true;
14542
14543 // Get rid of the unused bits on the right.
14544 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
14545 // Get rid of the unused bits on the left.
14546 if (NarrowedUsedBits.countLeadingZeros())
14547 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
14548 // Check that the chunk of bits is completely used.
14549 return NarrowedUsedBits.isAllOnesValue();
14550}
14551
14552/// Check whether or not \p First and \p Second are next to each other
14553/// in memory. This means that there is no hole between the bits loaded
14554/// by \p First and the bits loaded by \p Second.
14555static bool areSlicesNextToEachOther(const LoadedSlice &First,
14556 const LoadedSlice &Second) {
14557 assert(First.Origin == Second.Origin && First.Origin &&
14558 "Unable to match different memory origins.");
14559 APInt UsedBits = First.getUsedBits();
14560 assert((UsedBits & Second.getUsedBits()) == 0 &&
14561 "Slices are not supposed to overlap.");
14562 UsedBits |= Second.getUsedBits();
14563 return areUsedBitsDense(UsedBits);
14564}
14565
14566/// Adjust the \p GlobalLSCost according to the target
14567/// paring capabilities and the layout of the slices.
14568/// \pre \p GlobalLSCost should account for at least as many loads as
14569/// there is in the slices in \p LoadedSlices.
14571 LoadedSlice::Cost &GlobalLSCost) {
14572 unsigned NumberOfSlices = LoadedSlices.size();
14573 // If there is less than 2 elements, no pairing is possible.
14574 if (NumberOfSlices < 2)
14575 return;
14576
14577 // Sort the slices so that elements that are likely to be next to each
14578 // other in memory are next to each other in the list.
14579 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
14580 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
14581 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
14582 });
14583 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
14584 // First (resp. Second) is the first (resp. Second) potentially candidate
14585 // to be placed in a paired load.
14586 const LoadedSlice *First = nullptr;
14587 const LoadedSlice *Second = nullptr;
14588 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
14589 // Set the beginning of the pair.
14590 First = Second) {
14591 Second = &LoadedSlices[CurrSlice];
14592
14593 // If First is NULL, it means we start a new pair.
14594 // Get to the next slice.
14595 if (!First)
14596 continue;
14597
14598 EVT LoadedType = First->getLoadedType();
14599
14600 // If the types of the slices are different, we cannot pair them.
14601 if (LoadedType != Second->getLoadedType())
14602 continue;
14603
14604 // Check if the target supplies paired loads for this type.
14605 unsigned RequiredAlignment = 0;
14606 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
14607 // move to the next pair, this type is hopeless.
14608 Second = nullptr;
14609 continue;
14610 }
14611 // Check if we meet the alignment requirement.
14612 if (RequiredAlignment > First->getAlignment())
14613 continue;
14614
14615 // Check that both loads are next to each other in memory.
14616 if (!areSlicesNextToEachOther(*First, *Second))
14617 continue;
14618
14619 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
14620 --GlobalLSCost.Loads;
14621 // Move to the next pair.
14622 Second = nullptr;
14623 }
14624}
14625
14626/// Check the profitability of all involved LoadedSlice.
14627/// Currently, it is considered profitable if there is exactly two
14628/// involved slices (1) which are (2) next to each other in memory, and
14629/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
14630///
14631/// Note: The order of the elements in \p LoadedSlices may be modified, but not
14632/// the elements themselves.
14633///
14634/// FIXME: When the cost model will be mature enough, we can relax
14635/// constraints (1) and (2).
14637 const APInt &UsedBits, bool ForCodeSize) {
14638 unsigned NumberOfSlices = LoadedSlices.size();
14640 return NumberOfSlices > 1;
14641
14642 // Check (1).
14643 if (NumberOfSlices != 2)
14644 return false;
14645
14646 // Check (2).
14647 if (!areUsedBitsDense(UsedBits))
14648 return false;
14649
14650 // Check (3).
14651 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
14652 // The original code has one big load.
14653 OrigCost.Loads = 1;
14654 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
14655 const LoadedSlice &LS = LoadedSlices[CurrSlice];
14656 // Accumulate the cost of all the slices.
14657 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
14658 GlobalSlicingCost += SliceCost;
14659
14660 // Account as cost in the original configuration the gain obtained
14661 // with the current slices.
14662 OrigCost.addSliceGain(LS);
14663 }
14664
14665 // If the target supports paired load, adjust the cost accordingly.
14666 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
14667 return OrigCost > GlobalSlicingCost;
14668}
14669
14670/// If the given load, \p LI, is used only by trunc or trunc(lshr)
14671/// operations, split it in the various pieces being extracted.
14672///
14673/// This sort of thing is introduced by SROA.
14674/// This slicing takes care not to insert overlapping loads.
14675/// \pre LI is a simple load (i.e., not an atomic or volatile load).
14676bool DAGCombiner::SliceUpLoad(SDNode *N) {
14677 if (Level < AfterLegalizeDAG)
14678 return false;
14679
14680 LoadSDNode *LD = cast<LoadSDNode>(N);
14681 if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
14682 !LD->getValueType(0).isInteger())
14683 return false;
14684
14685 // Keep track of already used bits to detect overlapping values.
14686 // In that case, we will just abort the transformation.
14687 APInt UsedBits(LD->getValueSizeInBits(0), 0);
14688
14689 SmallVector<LoadedSlice, 4> LoadedSlices;
14690
14691 // Check if this load is used as several smaller chunks of bits.
14692 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
14693 // of computation for each trunc.
14694 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
14695 UI != UIEnd; ++UI) {
14696 // Skip the uses of the chain.
14697 if (UI.getUse().getResNo() != 0)
14698 continue;
14699
14700 SDNode *User = *UI;
14701 unsigned Shift = 0;
14702
14703 // Check if this is a trunc(lshr).
14704 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
14705 isa<ConstantSDNode>(User->getOperand(1))) {
14706 Shift = User->getConstantOperandVal(1);
14707 User = *User->use_begin();
14708 }
14709
14710 // At this point, User is a Truncate, iff we encountered, trunc or
14711 // trunc(lshr).
14712 if (User->getOpcode() != ISD::TRUNCATE)
14713 return false;
14714
14715 // The width of the type must be a power of 2 and greater than 8-bits.
14716 // Otherwise the load cannot be represented in LLVM IR.
14717 // Moreover, if we shifted with a non-8-bits multiple, the slice
14718 // will be across several bytes. We do not support that.
14719 unsigned Width = User->getValueSizeInBits(0);
14720 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
14721 return false;
14722
14723 // Build the slice for this chain of computations.
14724 LoadedSlice LS(User, LD, Shift, &DAG);
14725 APInt CurrentUsedBits = LS.getUsedBits();
14726
14727 // Check if this slice overlaps with another.
14728 if ((CurrentUsedBits & UsedBits) != 0)
14729 return false;
14730 // Update the bits used globally.
14731 UsedBits |= CurrentUsedBits;
14732
14733 // Check if the new slice would be legal.
14734 if (!LS.isLegal())
14735 return false;
14736
14737 // Record the slice.
14738 LoadedSlices.push_back(LS);
14739 }
14740
14741 // Abort slicing if it does not seem to be profitable.
14742 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
14743 return false;
14744
14745 ++SlicedLoads;
14746
14747 // Rewrite each chain to use an independent load.
14748 // By construction, each chain can be represented by a unique load.
14749
14750 // Prepare the argument for the new token factor for all the slices.
14751 SmallVector<SDValue, 8> ArgChains;
14753 LSIt = LoadedSlices.begin(),
14754 LSItEnd = LoadedSlices.end();
14755 LSIt != LSItEnd; ++LSIt) {
14756 SDValue SliceInst = LSIt->loadSlice();
14757 CombineTo(LSIt->Inst, SliceInst, true);
14758 if (SliceInst.getOpcode() != ISD::LOAD)
14759 SliceInst = SliceInst.getOperand(0);
14760 assert(SliceInst->getOpcode() == ISD::LOAD &&
14761 "It takes more than a zext to get to the loaded slice!!");
14762 ArgChains.push_back(SliceInst.getValue(1));
14763 }
14764
14766 ArgChains);
14767 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14768 AddToWorklist(Chain.getNode());
14769 return true;
14770}
14771
14772/// Check to see if V is (and load (ptr), imm), where the load is having
14773/// specific bytes cleared out. If so, return the byte size being masked out
14774/// and the shift amount.
14775static std::pair<unsigned, unsigned>
14777 std::pair<unsigned, unsigned> Result(0, 0);
14778
14779 // Check for the structure we're looking for.
14780 if (V->getOpcode() != ISD::AND ||
14781 !isa<ConstantSDNode>(V->getOperand(1)) ||
14783 return Result;
14784
14785 // Check the chain and pointer.
14786 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
14787 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
14788
14789 // This only handles simple types.
14790 if (V.getValueType() != MVT::i16 &&
14791 V.getValueType() != MVT::i32 &&
14792 V.getValueType() != MVT::i64)
14793 return Result;
14794
14795 // Check the constant mask. Invert it so that the bits being masked out are
14796 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
14797 // follow the sign bit for uniformity.
14798 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
14799 unsigned NotMaskLZ = countLeadingZeros(NotMask);
14800 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
14801 unsigned NotMaskTZ = countTrailingZeros(NotMask);
14802 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
14803 if (NotMaskLZ == 64) return Result; // All zero mask.
14804
14805 // See if we have a continuous run of bits. If so, we have 0*1+0*
14806 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
14807 return Result;
14808
14809 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
14810 if (V.getValueType() != MVT::i64 && NotMaskLZ)
14811 NotMaskLZ -= 64-V.getValueSizeInBits();
14812
14813 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
14814 switch (MaskedBytes) {
14815 case 1:
14816 case 2:
14817 case 4: break;
14818 default: return Result; // All one mask, or 5-byte mask.
14819 }
14820
14821 // Verify that the first bit starts at a multiple of mask so that the access
14822 // is aligned the same as the access width.
14823 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
14824
14825 // For narrowing to be valid, it must be the case that the load the
14826 // immediately preceding memory operation before the store.
14827 if (LD == Chain.getNode())
14828 ; // ok.
14829 else if (Chain->getOpcode() == ISD::TokenFactor &&
14830 SDValue(LD, 1).hasOneUse()) {
14831 // LD has only 1 chain use so they are no indirect dependencies.
14832 bool isOk = false;
14833 for (const SDValue &ChainOp : Chain->op_values())
14834 if (ChainOp.getNode() == LD) {
14835 isOk = true;
14836 break;
14837 }
14838 if (!isOk)
14839 return Result;
14840 } else
14841 return Result; // Fail.
14842
14843 Result.first = MaskedBytes;
14844 Result.second = NotMaskTZ/8;
14845 return Result;
14846}
14847
14848/// Check to see if IVal is something that provides a value as specified by
14849/// MaskInfo. If so, replace the specified store with a narrower store of
14850/// truncated IVal.
14851static SDNode *
14852ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
14853 SDValue IVal, StoreSDNode *St,
14854 DAGCombiner *DC) {
14855 unsigned NumBytes = MaskInfo.first;
14856 unsigned ByteShift = MaskInfo.second;
14857 SelectionDAG &DAG = DC->getDAG();
14858
14859 // Check to see if IVal is all zeros in the part being masked in by the 'or'
14860 // that uses this. If not, this is not a replacement.
14861 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
14862 ByteShift*8, (ByteShift+NumBytes)*8);
14863 if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
14864
14865 // Check that it is legal on the target to do this. It is legal if the new
14866 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
14867 // legalization.
14868 MVT VT = MVT::getIntegerVT(NumBytes*8);
14869 if (!DC->isTypeLegal(VT))
14870 return nullptr;
14871
14872 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
14873 // shifted by ByteShift and truncated down to NumBytes.
14874 if (ByteShift) {
14875 SDLoc DL(IVal);
14876 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
14877 DAG.getConstant(ByteShift*8, DL,
14878 DC->getShiftAmountTy(IVal.getValueType())));
14879 }
14880
14881 // Figure out the offset for the store and the alignment of the access.
14882 unsigned StOffset;
14883 unsigned NewAlign = St->getAlignment();
14884
14885 if (DAG.getDataLayout().isLittleEndian())
14886 StOffset = ByteShift;
14887 else
14888 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
14889
14890 SDValue Ptr = St->getBasePtr();
14891 if (StOffset) {
14892 SDLoc DL(IVal);
14893 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
14894 Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
14895 NewAlign = MinAlign(NewAlign, StOffset);
14896 }
14897
14898 // Truncate down to the new size.
14899 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
14900
14901 ++OpsNarrowed;
14902 return DAG
14903 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
14904 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
14905 .getNode();
14906}
14907
14908/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
14909/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
14910/// narrowing the load and store if it would end up being a win for performance
14911/// or code size.
14912SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
14913 StoreSDNode *ST = cast<StoreSDNode>(N);
14914 if (ST->isVolatile())
14915 return SDValue();
14916
14917 SDValue Chain = ST->getChain();
14918 SDValue Value = ST->getValue();
14919 SDValue Ptr = ST->getBasePtr();
14920 EVT VT = Value.getValueType();
14921
14922 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
14923 return SDValue();
14924
14925 unsigned Opc = Value.getOpcode();
14926
14927 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
14928 // is a byte mask indicating a consecutive number of bytes, check to see if
14929 // Y is known to provide just those bytes. If so, we try to replace the
14930 // load + replace + store sequence with a single (narrower) store, which makes
14931 // the load dead.
14932 if (Opc == ISD::OR) {
14933 std::pair<unsigned, unsigned> MaskedLoad;
14934 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
14935 if (MaskedLoad.first)
14936 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14937 Value.getOperand(1), ST,this))
14938 return SDValue(NewST, 0);
14939
14940 // Or is commutative, so try swapping X and Y.
14941 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
14942 if (MaskedLoad.first)
14943 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14944 Value.getOperand(0), ST,this))
14945 return SDValue(NewST, 0);
14946 }
14947
14948 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
14949 Value.getOperand(1).getOpcode() != ISD::Constant)
14950 return SDValue();
14951
14952 SDValue N0 = Value.getOperand(0);
14953 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14954 Chain == SDValue(N0.getNode(), 1)) {
14955 LoadSDNode *LD = cast<LoadSDNode>(N0);
14956 if (LD->getBasePtr() != Ptr ||
14957 LD->getPointerInfo().getAddrSpace() !=
14958 ST->getPointerInfo().getAddrSpace())
14959 return SDValue();
14960
14961 // Find the type to narrow it the load / op / store to.
14962 SDValue N1 = Value.getOperand(1);
14963 unsigned BitWidth = N1.getValueSizeInBits();
14964 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
14965 if (Opc == ISD::AND)
14966 Imm ^= APInt::getAllOnesValue(BitWidth);
14967 if (Imm == 0 || Imm.isAllOnesValue())
14968 return SDValue();
14969 unsigned ShAmt = Imm.countTrailingZeros();
14970 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
14971 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
14972 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14973 // The narrowing should be profitable, the load/store operation should be
14974 // legal (or custom) and the store size should be equal to the NewVT width.
14975 while (NewBW < BitWidth &&
14976 (NewVT.getStoreSizeInBits() != NewBW ||
14977 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
14978 !TLI.isNarrowingProfitable(VT, NewVT))) {
14979 NewBW = NextPowerOf2(NewBW);
14980 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14981 }
14982 if (NewBW >= BitWidth)
14983 return SDValue();
14984
14985 // If the lsb changed does not start at the type bitwidth boundary,
14986 // start at the previous one.
14987 if (ShAmt % NewBW)
14988 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
14989 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
14990 std::min(BitWidth, ShAmt + NewBW));
14991 if ((Imm & Mask) == Imm) {
14992 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
14993 if (Opc == ISD::AND)
14994 NewImm ^= APInt::getAllOnesValue(NewBW);
14995 uint64_t PtrOff = ShAmt / 8;
14996 // For big endian targets, we need to adjust the offset to the pointer to
14997 // load the correct bytes.
14998 if (DAG.getDataLayout().isBigEndian())
14999 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
15000
15001 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
15002 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
15003 if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
15004 return SDValue();
15005
15006 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
15007 Ptr.getValueType(), Ptr,
15008 DAG.getConstant(PtrOff, SDLoc(LD),
15009 Ptr.getValueType()));
15010 SDValue NewLD =
15011 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
15012 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
15013 LD->getMemOperand()->getFlags(), LD->getAAInfo());
15014 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
15015 DAG.getConstant(NewImm, SDLoc(Value),
15016 NewVT));
15017 SDValue NewST =
15018 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
15019 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
15020
15021 AddToWorklist(NewPtr.getNode());
15022 AddToWorklist(NewLD.getNode());
15023 AddToWorklist(NewVal.getNode());
15024 WorklistRemover DeadNodes(*this);
15025 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
15026 ++OpsNarrowed;
15027 return NewST;
15028 }
15029 }
15030
15031 return SDValue();
15032}
15033
15034/// For a given floating point load / store pair, if the load value isn't used
15035/// by any other operations, then consider transforming the pair to integer
15036/// load / store operations if the target deems the transformation profitable.
15037SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
15038 StoreSDNode *ST = cast<StoreSDNode>(N);
15039 SDValue Value = ST->getValue();
15040 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
15041 Value.hasOneUse()) {
15042 LoadSDNode *LD = cast<LoadSDNode>(Value);
15043 EVT VT = LD->getMemoryVT();
15044 if (!VT.isFloatingPoint() ||
15045 VT != ST->getMemoryVT() ||
15046 LD->isNonTemporal() ||
15047 ST->isNonTemporal() ||
15048 LD->getPointerInfo().getAddrSpace() != 0 ||
15049 ST->getPointerInfo().getAddrSpace() != 0)
15050 return SDValue();
15051
15052 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
15053 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
15054 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
15057 return SDValue();
15058
15059 unsigned LDAlign = LD->getAlignment();
15060 unsigned STAlign = ST->getAlignment();
15061 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
15062 unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
15063 if (LDAlign < ABIAlign || STAlign < ABIAlign)
15064 return SDValue();
15065
15066 SDValue NewLD =
15067 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
15068 LD->getPointerInfo(), LDAlign);
15069
15070 SDValue NewST =
15071 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
15072 ST->getPointerInfo(), STAlign);
15073
15074 AddToWorklist(NewLD.getNode());
15075 AddToWorklist(NewST.getNode());
15076 WorklistRemover DeadNodes(*this);
15077 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
15078 ++LdStFP2Int;
15079 return NewST;
15080 }
15081
15082 return SDValue();
15083}
15084
15085// This is a helper function for visitMUL to check the profitability
15086// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
15087// MulNode is the original multiply, AddNode is (add x, c1),
15088// and ConstNode is c2.
15089//
15090// If the (add x, c1) has multiple uses, we could increase
15091// the number of adds if we make this transformation.
15092// It would only be worth doing this if we can remove a
15093// multiply in the process. Check for that here.
15094// To illustrate:
15095// (A + c1) * c3
15096// (A + c2) * c3
15097// We're checking for cases where we have common "c3 * A" expressions.
15098bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
15099 SDValue &AddNode,
15100 SDValue &ConstNode) {
15101 APInt Val;
15102
15103 // If the add only has one use, this would be OK to do.
15104 if (AddNode.getNode()->hasOneUse())
15105 return true;
15106
15107 // Walk all the users of the constant with which we're multiplying.
15108 for (SDNode *Use : ConstNode->uses()) {
15109 if (Use == MulNode) // This use is the one we're on right now. Skip it.
15110 continue;
15111
15112 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
15113 SDNode *OtherOp;
15114 SDNode *MulVar = AddNode.getOperand(0).getNode();
15115
15116 // OtherOp is what we're multiplying against the constant.
15117 if (Use->getOperand(0) == ConstNode)
15118 OtherOp = Use->getOperand(1).getNode();
15119 else
15120 OtherOp = Use->getOperand(0).getNode();
15121
15122 // Check to see if multiply is with the same operand of our "add".
15123 //
15124 // ConstNode = CONST
15125 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
15126 // ...
15127 // AddNode = (A + c1) <-- MulVar is A.
15128 // = AddNode * ConstNode <-- current visiting instruction.
15129 //
15130 // If we make this transformation, we will have a common
15131 // multiply (ConstNode * A) that we can save.
15132 if (OtherOp == MulVar)
15133 return true;
15134
15135 // Now check to see if a future expansion will give us a common
15136 // multiply.
15137 //
15138 // ConstNode = CONST
15139 // AddNode = (A + c1)
15140 // ... = AddNode * ConstNode <-- current visiting instruction.
15141 // ...
15142 // OtherOp = (A + c2)
15143 // Use = OtherOp * ConstNode <-- visiting Use.
15144 //
15145 // If we make this transformation, we will have a common
15146 // multiply (CONST * A) after we also do the same transformation
15147 // to the "t2" instruction.
15148 if (OtherOp->getOpcode() == ISD::ADD &&
15150 OtherOp->getOperand(0).getNode() == MulVar)
15151 return true;
15152 }
15153 }
15154
15155 // Didn't find a case where this would be profitable.
15156 return false;
15157}
15158
15159SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
15160 unsigned NumStores) {
15163 SDLoc StoreDL(StoreNodes[0].MemNode);
15164
15165 for (unsigned i = 0; i < NumStores; ++i) {
15166 Visited.insert(StoreNodes[i].MemNode);
15167 }
15168
15169 // don't include nodes that are children or repeated nodes.
15170 for (unsigned i = 0; i < NumStores; ++i) {
15171 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
15172 Chains.push_back(StoreNodes[i].MemNode->getChain());
15173 }
15174
15175 assert(Chains.size() > 0 && "Chain should have generated a chain");
15176 return DAG.getTokenFactor(StoreDL, Chains);
15177}
15178
15179bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
15180 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
15181 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
15182 // Make sure we have something to merge.
15183 if (NumStores < 2)
15184 return false;
15185
15186 // The latest Node in the DAG.
15187 SDLoc DL(StoreNodes[0].MemNode);
15188
15189 int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
15190 unsigned SizeInBits = NumStores * ElementSizeBits;
15191 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15192
15193 EVT StoreTy;
15194 if (UseVector) {
15195 unsigned Elts = NumStores * NumMemElts;
15196 // Get the type for the merged vector store.
15197 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15198 } else
15199 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
15200
15201 SDValue StoredVal;
15202 if (UseVector) {
15203 if (IsConstantSrc) {
15204 SmallVector<SDValue, 8> BuildVector;
15205 for (unsigned I = 0; I != NumStores; ++I) {
15206 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
15207 SDValue Val = St->getValue();
15208 // If constant is of the wrong type, convert it now.
15209 if (MemVT != Val.getValueType()) {
15210 Val = peekThroughBitcasts(Val);
15211 // Deal with constants of wrong size.
15212 if (ElementSizeBits != Val.getValueSizeInBits()) {
15213 EVT IntMemVT =
15215 if (isa<ConstantFPSDNode>(Val)) {
15216 // Not clear how to truncate FP values.
15217 return false;
15218 } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
15219 Val = DAG.getConstant(C->getAPIntValue()
15220 .zextOrTrunc(Val.getValueSizeInBits())
15221 .zextOrTrunc(ElementSizeBits),
15222 SDLoc(C), IntMemVT);
15223 }
15224 // Make sure correctly size type is the correct type.
15225 Val = DAG.getBitcast(MemVT, Val);
15226 }
15227 BuildVector.push_back(Val);
15228 }
15229 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15231 DL, StoreTy, BuildVector);
15232 } else {
15234 for (unsigned i = 0; i < NumStores; ++i) {
15235 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15237 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
15238 // type MemVT. If the underlying value is not the correct
15239 // type, but it is an extraction of an appropriate vector we
15240 // can recast Val to be of the correct type. This may require
15241 // converting between EXTRACT_VECTOR_ELT and
15242 // EXTRACT_SUBVECTOR.
15243 if ((MemVT != Val.getValueType()) &&
15246 EVT MemVTScalarTy = MemVT.getScalarType();
15247 // We may need to add a bitcast here to get types to line up.
15248 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
15249 Val = DAG.getBitcast(MemVT, Val);
15250 } else {
15251 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
15253 SDValue Vec = Val.getOperand(0);
15254 SDValue Idx = Val.getOperand(1);
15255 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
15256 }
15257 }
15258 Ops.push_back(Val);
15259 }
15260
15261 // Build the extracted vector elements back into a vector.
15262 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15264 DL, StoreTy, Ops);
15265 }
15266 } else {
15267 // We should always use a vector store when merging extracted vector
15268 // elements, so this path implies a store of constants.
15269 assert(IsConstantSrc && "Merged vector elements should use vector store");
15270
15271 APInt StoreInt(SizeInBits, 0);
15272
15273 // Construct a single integer constant which is made of the smaller
15274 // constant inputs.
15275 bool IsLE = DAG.getDataLayout().isLittleEndian();
15276 for (unsigned i = 0; i < NumStores; ++i) {
15277 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
15278 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
15279
15280 SDValue Val = St->getValue();
15281 Val = peekThroughBitcasts(Val);
15282 StoreInt <<= ElementSizeBits;
15283 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
15284 StoreInt |= C->getAPIntValue()
15285 .zextOrTrunc(ElementSizeBits)
15286 .zextOrTrunc(SizeInBits);
15287 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
15288 StoreInt |= C->getValueAPF()
15289 .bitcastToAPInt()
15290 .zextOrTrunc(ElementSizeBits)
15291 .zextOrTrunc(SizeInBits);
15292 // If fp truncation is necessary give up for now.
15293 if (MemVT.getSizeInBits() != ElementSizeBits)
15294 return false;
15295 } else {
15296 llvm_unreachable("Invalid constant element type");
15297 }
15298 }
15299
15300 // Create the new Load and Store operations.
15301 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
15302 }
15303
15304 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15305 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
15306
15307 // make sure we use trunc store if it's necessary to be legal.
15308 SDValue NewStore;
15309 if (!UseTrunc) {
15310 NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
15311 FirstInChain->getPointerInfo(),
15312 FirstInChain->getAlignment());
15313 } else { // Must be realized as a trunc store
15314 EVT LegalizedStoredValTy =
15315 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
15316 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
15317 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
15318 SDValue ExtendedStoreVal =
15319 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
15320 LegalizedStoredValTy);
15321 NewStore = DAG.getTruncStore(
15322 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
15323 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
15324 FirstInChain->getAlignment(),
15325 FirstInChain->getMemOperand()->getFlags());
15326 }
15327
15328 // Replace all merged stores with the new store.
15329 for (unsigned i = 0; i < NumStores; ++i)
15330 CombineTo(StoreNodes[i].MemNode, NewStore);
15331
15332 AddToWorklist(NewChain.getNode());
15333 return true;
15334}
15335
15336void DAGCombiner::getStoreMergeCandidates(
15337 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
15338 SDNode *&RootNode) {
15339 // This holds the base pointer, index, and the offset in bytes from the base
15340 // pointer.
15342 EVT MemVT = St->getMemoryVT();
15343
15345 // We must have a base and an offset.
15346 if (!BasePtr.getBase().getNode())
15347 return;
15348
15349 // Do not handle stores to undef base pointers.
15350 if (BasePtr.getBase().isUndef())
15351 return;
15352
15353 bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
15354 bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15356 bool IsLoadSrc = isa<LoadSDNode>(Val);
15357 BaseIndexOffset LBasePtr;
15358 // Match on loadbaseptr if relevant.
15359 EVT LoadVT;
15360 if (IsLoadSrc) {
15361 auto *Ld = cast<LoadSDNode>(Val);
15362 LBasePtr = BaseIndexOffset::match(Ld, DAG);
15363 LoadVT = Ld->getMemoryVT();
15364 // Load and store should be the same type.
15365 if (MemVT != LoadVT)
15366 return;
15367 // Loads must only have one use.
15368 if (!Ld->hasNUsesOfValue(1, 0))
15369 return;
15370 // The memory operands must not be volatile/indexed.
15371 if (Ld->isVolatile() || Ld->isIndexed())
15372 return;
15373 }
15374 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
15375 int64_t &Offset) -> bool {
15376 // The memory operands must not be volatile/indexed.
15377 if (Other->isVolatile() || Other->isIndexed())
15378 return false;
15379 // Don't mix temporal stores with non-temporal stores.
15380 if (St->isNonTemporal() != Other->isNonTemporal())
15381 return false;
15382 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
15383 // Allow merging constants of different types as integers.
15384 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
15385 : Other->getMemoryVT() != MemVT;
15386 if (IsLoadSrc) {
15387 if (NoTypeMatch)
15388 return false;
15389 // The Load's Base Ptr must also match
15390 if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
15391 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
15392 if (LoadVT != OtherLd->getMemoryVT())
15393 return false;
15394 // Loads must only have one use.
15395 if (!OtherLd->hasNUsesOfValue(1, 0))
15396 return false;
15397 // The memory operands must not be volatile/indexed.
15398 if (OtherLd->isVolatile() || OtherLd->isIndexed())
15399 return false;
15400 // Don't mix temporal loads with non-temporal loads.
15401 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
15402 return false;
15403 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
15404 return false;
15405 } else
15406 return false;
15407 }
15408 if (IsConstantSrc) {
15409 if (NoTypeMatch)
15410 return false;
15411 if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
15412 return false;
15413 }
15414 if (IsExtractVecSrc) {
15415 // Do not merge truncated stores here.
15416 if (Other->isTruncatingStore())
15417 return false;
15418 if (!MemVT.bitsEq(OtherBC.getValueType()))
15419 return false;
15420 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
15421 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15422 return false;
15423 }
15424 Ptr = BaseIndexOffset::match(Other, DAG);
15425 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
15426 };
15427
15428 // We looking for a root node which is an ancestor to all mergable
15429 // stores. We search up through a load, to our root and then down
15430 // through all children. For instance we will find Store{1,2,3} if
15431 // St is Store1, Store2. or Store3 where the root is not a load
15432 // which always true for nonvolatile ops. TODO: Expand
15433 // the search to find all valid candidates through multiple layers of loads.
15434 //
15435 // Root
15436 // |-------|-------|
15437 // Load Load Store3
15438 // | |
15439 // Store1 Store2
15440 //
15441 // FIXME: We should be able to climb and
15442 // descend TokenFactors to find candidates as well.
15443
15444 RootNode = St->getChain().getNode();
15445
15446 unsigned NumNodesExplored = 0;
15447 if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
15448 RootNode = Ldn->getChain().getNode();
15449 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15450 I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
15451 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
15452 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
15453 if (I2.getOperandNo() == 0)
15454 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
15455 BaseIndexOffset Ptr;
15456 int64_t PtrDiff;
15457 if (CandidateMatch(OtherST, Ptr, PtrDiff))
15458 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15459 }
15460 } else
15461 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15462 I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
15463 if (I.getOperandNo() == 0)
15464 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
15465 BaseIndexOffset Ptr;
15466 int64_t PtrDiff;
15467 if (CandidateMatch(OtherST, Ptr, PtrDiff))
15468 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15469 }
15470}
15471
15472// We need to check that merging these stores does not cause a loop in
15473// the DAG. Any store candidate may depend on another candidate
15474// indirectly through its operand (we already consider dependencies
15475// through the chain). Check in parallel by searching up from
15476// non-chain operands of candidates.
15477bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
15478 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
15479 SDNode *RootNode) {
15480 // FIXME: We should be able to truncate a full search of
15481 // predecessors by doing a BFS and keeping tabs the originating
15482 // stores from which worklist nodes come from in a similar way to
15483 // TokenFactor simplfication.
15484
15487
15488 // RootNode is a predecessor to all candidates so we need not search
15489 // past it. Add RootNode (peeking through TokenFactors). Do not count
15490 // these towards size check.
15491
15492 Worklist.push_back(RootNode);
15493 while (!Worklist.empty()) {
15494 auto N = Worklist.pop_back_val();
15495 if (!Visited.insert(N).second)
15496 continue; // Already present in Visited.
15497 if (N->getOpcode() == ISD::TokenFactor) {
15498 for (SDValue Op : N->ops())
15499 Worklist.push_back(Op.getNode());
15500 }
15501 }
15502
15503 // Don't count pruning nodes towards max.
15504 unsigned int Max = 1024 + Visited.size();
15505 // Search Ops of store candidates.
15506 for (unsigned i = 0; i < NumStores; ++i) {
15507 SDNode *N = StoreNodes[i].MemNode;
15508 // Of the 4 Store Operands:
15509 // * Chain (Op 0) -> We have already considered these
15510 // in candidate selection and can be
15511 // safely ignored
15512 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
15513 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
15514 // but aren't necessarily fromt the same base node, so
15515 // cycles possible (e.g. via indexed store).
15516 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
15517 // non-indexed stores). Not constant on all targets (e.g. ARM)
15518 // and so can participate in a cycle.
15519 for (unsigned j = 1; j < N->getNumOperands(); ++j)
15520 Worklist.push_back(N->getOperand(j).getNode());
15521 }
15522 // Search through DAG. We can stop early if we find a store node.
15523 for (unsigned i = 0; i < NumStores; ++i)
15524 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
15525 Max))
15526 return false;
15527 return true;
15528}
15529
15530bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
15531 if (OptLevel == CodeGenOpt::None)
15532 return false;
15533
15534 EVT MemVT = St->getMemoryVT();
15535 int64_t ElementSizeBytes = MemVT.getStoreSize();
15536 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15537
15538 if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
15539 return false;
15540
15541 bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
15542 Attribute::NoImplicitFloat);
15543
15544 // This function cannot currently deal with non-byte-sized memory sizes.
15545 if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
15546 return false;
15547
15548 if (!MemVT.isSimple())
15549 return false;
15550
15551 // Perform an early exit check. Do not bother looking at stored values that
15552 // are not constants, loads, or extracted vector elements.
15553 SDValue StoredVal = peekThroughBitcasts(St->getValue());
15554 bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
15555 bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
15556 isa<ConstantFPSDNode>(StoredVal);
15557 bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15558 StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
15559 bool IsNonTemporalStore = St->isNonTemporal();
15560 bool IsNonTemporalLoad =
15561 IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
15562
15563 if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
15564 return false;
15565
15566 SmallVector<MemOpLink, 8> StoreNodes;
15567 SDNode *RootNode;
15568 // Find potential store merge candidates by searching through chain sub-DAG
15569 getStoreMergeCandidates(St, StoreNodes, RootNode);
15570
15571 // Check if there is anything to merge.
15572 if (StoreNodes.size() < 2)
15573 return false;
15574
15575 // Sort the memory operands according to their distance from the
15576 // base pointer.
15577 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
15578 return LHS.OffsetFromBase < RHS.OffsetFromBase;
15579 });
15580
15581 // Store Merge attempts to merge the lowest stores. This generally
15582 // works out as if successful, as the remaining stores are checked
15583 // after the first collection of stores is merged. However, in the
15584 // case that a non-mergeable store is found first, e.g., {p[-2],
15585 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
15586 // mergeable cases. To prevent this, we prune such stores from the
15587 // front of StoreNodes here.
15588
15589 bool RV = false;
15590 while (StoreNodes.size() > 1) {
15591 unsigned StartIdx = 0;
15592 while ((StartIdx + 1 < StoreNodes.size()) &&
15593 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
15594 StoreNodes[StartIdx + 1].OffsetFromBase)
15595 ++StartIdx;
15596
15597 // Bail if we don't have enough candidates to merge.
15598 if (StartIdx + 1 >= StoreNodes.size())
15599 return RV;
15600
15601 if (StartIdx)
15602 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
15603
15604 // Scan the memory operations on the chain and find the first
15605 // non-consecutive store memory address.
15606 unsigned NumConsecutiveStores = 1;
15607 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
15608 // Check that the addresses are consecutive starting from the second
15609 // element in the list of stores.
15610 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
15611 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
15612 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15613 break;
15614 NumConsecutiveStores = i + 1;
15615 }
15616
15617 if (NumConsecutiveStores < 2) {
15618 StoreNodes.erase(StoreNodes.begin(),
15619 StoreNodes.begin() + NumConsecutiveStores);
15620 continue;
15621 }
15622
15623 // The node with the lowest store address.
15624 LLVMContext &Context = *DAG.getContext();
15625 const DataLayout &DL = DAG.getDataLayout();
15626
15627 // Store the constants into memory as one consecutive store.
15628 if (IsConstantSrc) {
15629 while (NumConsecutiveStores >= 2) {
15630 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15631 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15632 unsigned FirstStoreAlign = FirstInChain->getAlignment();
15633 unsigned LastLegalType = 1;
15634 unsigned LastLegalVectorType = 1;
15635 bool LastIntegerTrunc = false;
15636 bool NonZero = false;
15637 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
15638 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15639 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
15640 SDValue StoredVal = ST->getValue();
15641 bool IsElementZero = false;
15642 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
15643 IsElementZero = C->isNullValue();
15644 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
15645 IsElementZero = C->getConstantFPValue()->isNullValue();
15646 if (IsElementZero) {
15647 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
15648 FirstZeroAfterNonZero = i;
15649 }
15650 NonZero |= !IsElementZero;
15651
15652 // Find a legal type for the constant store.
15653 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15654 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15655 bool IsFast = false;
15656
15657 // Break early when size is too large to be legal.
15658 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15659 break;
15660
15661 if (TLI.isTypeLegal(StoreTy) &&
15662 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15663 TLI.allowsMemoryAccess(Context, DL, StoreTy,
15664 *FirstInChain->getMemOperand(), &IsFast) &&
15665 IsFast) {
15666 LastIntegerTrunc = false;
15667 LastLegalType = i + 1;
15668 // Or check whether a truncstore is legal.
15669 } else if (TLI.getTypeAction(Context, StoreTy) ==
15671 EVT LegalizedStoredValTy =
15672 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
15673 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15674 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15675 TLI.allowsMemoryAccess(Context, DL, StoreTy,
15676 *FirstInChain->getMemOperand(),
15677 &IsFast) &&
15678 IsFast) {
15679 LastIntegerTrunc = true;
15680 LastLegalType = i + 1;
15681 }
15682 }
15683
15684 // We only use vectors if the constant is known to be zero or the
15685 // target allows it and the function is not marked with the
15686 // noimplicitfloat attribute.
15687 if ((!NonZero ||
15688 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
15689 !NoVectors) {
15690 // Find a legal type for the vector store.
15691 unsigned Elts = (i + 1) * NumMemElts;
15692 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15693 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
15694 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15696 Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
15697 IsFast)
15698 LastLegalVectorType = i + 1;
15699 }
15700 }
15701
15702 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
15703 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
15704
15705 // Check if we found a legal integer type that creates a meaningful
15706 // merge.
15707 if (NumElem < 2) {
15708 // We know that candidate stores are in order and of correct
15709 // shape. While there is no mergeable sequence from the
15710 // beginning one may start later in the sequence. The only
15711 // reason a merge of size N could have failed where another of
15712 // the same size would not have, is if the alignment has
15713 // improved or we've dropped a non-zero value. Drop as many
15714 // candidates as we can here.
15715 unsigned NumSkip = 1;
15716 while (
15717 (NumSkip < NumConsecutiveStores) &&
15718 (NumSkip < FirstZeroAfterNonZero) &&
15719 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15720 NumSkip++;
15721
15722 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15723 NumConsecutiveStores -= NumSkip;
15724 continue;
15725 }
15726
15727 // Check that we can merge these candidates without causing a cycle.
15728 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15729 RootNode)) {
15730 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15731 NumConsecutiveStores -= NumElem;
15732 continue;
15733 }
15734
15735 RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
15736 UseVector, LastIntegerTrunc);
15737
15738 // Remove merged stores for next iteration.
15739 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15740 NumConsecutiveStores -= NumElem;
15741 }
15742 continue;
15743 }
15744
15745 // When extracting multiple vector elements, try to store them
15746 // in one vector store rather than a sequence of scalar stores.
15747 if (IsExtractVecSrc) {
15748 // Loop on Consecutive Stores on success.
15749 while (NumConsecutiveStores >= 2) {
15750 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15751 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15752 unsigned FirstStoreAlign = FirstInChain->getAlignment();
15753 unsigned NumStoresToMerge = 1;
15754 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15755 // Find a legal type for the vector store.
15756 unsigned Elts = (i + 1) * NumMemElts;
15757 EVT Ty =
15758 EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15759 bool IsFast;
15760
15761 // Break early when size is too large to be legal.
15762 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
15763 break;
15764
15765 if (TLI.isTypeLegal(Ty) &&
15766 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15767 TLI.allowsMemoryAccess(Context, DL, Ty,
15768 *FirstInChain->getMemOperand(), &IsFast) &&
15769 IsFast)
15770 NumStoresToMerge = i + 1;
15771 }
15772
15773 // Check if we found a legal integer type creating a meaningful
15774 // merge.
15775 if (NumStoresToMerge < 2) {
15776 // We know that candidate stores are in order and of correct
15777 // shape. While there is no mergeable sequence from the
15778 // beginning one may start later in the sequence. The only
15779 // reason a merge of size N could have failed where another of
15780 // the same size would not have, is if the alignment has
15781 // improved. Drop as many candidates as we can here.
15782 unsigned NumSkip = 1;
15783 while (
15784 (NumSkip < NumConsecutiveStores) &&
15785 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15786 NumSkip++;
15787
15788 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15789 NumConsecutiveStores -= NumSkip;
15790 continue;
15791 }
15792
15793 // Check that we can merge these candidates without causing a cycle.
15794 if (!checkMergeStoreCandidatesForDependencies(
15795 StoreNodes, NumStoresToMerge, RootNode)) {
15796 StoreNodes.erase(StoreNodes.begin(),
15797 StoreNodes.begin() + NumStoresToMerge);
15798 NumConsecutiveStores -= NumStoresToMerge;
15799 continue;
15800 }
15801
15802 RV |= MergeStoresOfConstantsOrVecElts(
15803 StoreNodes, MemVT, NumStoresToMerge, false, true, false);
15804
15805 StoreNodes.erase(StoreNodes.begin(),
15806 StoreNodes.begin() + NumStoresToMerge);
15807 NumConsecutiveStores -= NumStoresToMerge;
15808 }
15809 continue;
15810 }
15811
15812 // Below we handle the case of multiple consecutive stores that
15813 // come from multiple consecutive loads. We merge them into a single
15814 // wide load and a single wide store.
15815
15816 // Look for load nodes which are used by the stored values.
15817 SmallVector<MemOpLink, 8> LoadNodes;
15818
15819 // Find acceptable loads. Loads need to have the same chain (token factor),
15820 // must not be zext, volatile, indexed, and they must be consecutive.
15821 BaseIndexOffset LdBasePtr;
15822
15823 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15824 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15826 LoadSDNode *Ld = cast<LoadSDNode>(Val);
15827
15828 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
15829 // If this is not the first ptr that we check.
15830 int64_t LdOffset = 0;
15831 if (LdBasePtr.getBase().getNode()) {
15832 // The base ptr must be the same.
15833 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
15834 break;
15835 } else {
15836 // Check that all other base pointers are the same as this one.
15837 LdBasePtr = LdPtr;
15838 }
15839
15840 // We found a potential memory operand to merge.
15841 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
15842 }
15843
15844 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
15845 // If we have load/store pair instructions and we only have two values,
15846 // don't bother merging.
15847 unsigned RequiredAlignment;
15848 if (LoadNodes.size() == 2 &&
15849 TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
15850 StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
15851 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
15852 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
15853 break;
15854 }
15855 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15856 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15857 unsigned FirstStoreAlign = FirstInChain->getAlignment();
15858 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
15859 unsigned FirstLoadAlign = FirstLoad->getAlignment();
15860
15861 // Scan the memory operations on the chain and find the first
15862 // non-consecutive load memory address. These variables hold the index in
15863 // the store node array.
15864
15865 unsigned LastConsecutiveLoad = 1;
15866
15867 // This variable refers to the size and not index in the array.
15868 unsigned LastLegalVectorType = 1;
15869 unsigned LastLegalIntegerType = 1;
15870 bool isDereferenceable = true;
15871 bool DoIntegerTruncate = false;
15872 StartAddress = LoadNodes[0].OffsetFromBase;
15873 SDValue FirstChain = FirstLoad->getChain();
15874 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
15875 // All loads must share the same chain.
15876 if (LoadNodes[i].MemNode->getChain() != FirstChain)
15877 break;
15878
15879 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
15880 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15881 break;
15882 LastConsecutiveLoad = i;
15883
15884 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
15885 isDereferenceable = false;
15886
15887 // Find a legal type for the vector store.
15888 unsigned Elts = (i + 1) * NumMemElts;
15889 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15890
15891 // Break early when size is too large to be legal.
15892 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15893 break;
15894
15895 bool IsFastSt, IsFastLd;
15896 if (TLI.isTypeLegal(StoreTy) &&
15897 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15898 TLI.allowsMemoryAccess(Context, DL, StoreTy,
15899 *FirstInChain->getMemOperand(), &IsFastSt) &&
15900 IsFastSt &&
15901 TLI.allowsMemoryAccess(Context, DL, StoreTy,
15902 *FirstLoad->getMemOperand(), &IsFastLd) &&
15903 IsFastLd) {
15904 LastLegalVectorType = i + 1;
15905 }
15906
15907 // Find a legal type for the integer store.
15908 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15909 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15910 if (TLI.isTypeLegal(StoreTy) &&
15911 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15912 TLI.allowsMemoryAccess(Context, DL, StoreTy,
15913 *FirstInChain->getMemOperand(), &IsFastSt) &&
15914 IsFastSt &&
15915 TLI.allowsMemoryAccess(Context, DL, StoreTy,
15916 *FirstLoad->getMemOperand(), &IsFastLd) &&
15917 IsFastLd) {
15918 LastLegalIntegerType = i + 1;
15919 DoIntegerTruncate = false;
15920 // Or check whether a truncstore and extload is legal.
15921 } else if (TLI.getTypeAction(Context, StoreTy) ==
15923 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
15924 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15925 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15926 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
15927 StoreTy) &&
15928 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
15929 StoreTy) &&
15930 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
15931 TLI.allowsMemoryAccess(Context, DL, StoreTy,
15932 *FirstInChain->getMemOperand(),
15933 &IsFastSt) &&
15934 IsFastSt &&
15935 TLI.allowsMemoryAccess(Context, DL, StoreTy,
15936 *FirstLoad->getMemOperand(), &IsFastLd) &&
15937 IsFastLd) {
15938 LastLegalIntegerType = i + 1;
15939 DoIntegerTruncate = true;
15940 }
15941 }
15942 }
15943
15944 // Only use vector types if the vector type is larger than the integer
15945 // type. If they are the same, use integers.
15946 bool UseVectorTy =
15947 LastLegalVectorType > LastLegalIntegerType && !NoVectors;
15948 unsigned LastLegalType =
15949 std::max(LastLegalVectorType, LastLegalIntegerType);
15950
15951 // We add +1 here because the LastXXX variables refer to location while
15952 // the NumElem refers to array/index size.
15953 unsigned NumElem =
15954 std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
15955 NumElem = std::min(LastLegalType, NumElem);
15956
15957 if (NumElem < 2) {
15958 // We know that candidate stores are in order and of correct
15959 // shape. While there is no mergeable sequence from the
15960 // beginning one may start later in the sequence. The only
15961 // reason a merge of size N could have failed where another of
15962 // the same size would not have is if the alignment or either
15963 // the load or store has improved. Drop as many candidates as we
15964 // can here.
15965 unsigned NumSkip = 1;
15966 while ((NumSkip < LoadNodes.size()) &&
15967 (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
15968 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15969 NumSkip++;
15970 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15971 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
15972 NumConsecutiveStores -= NumSkip;
15973 continue;
15974 }
15975
15976 // Check that we can merge these candidates without causing a cycle.
15977 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15978 RootNode)) {
15979 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15980 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15981 NumConsecutiveStores -= NumElem;
15982 continue;
15983 }
15984
15985 // Find if it is better to use vectors or integers to load and store
15986 // to memory.
15987 EVT JointMemOpVT;
15988 if (UseVectorTy) {
15989 // Find a legal type for the vector store.
15990 unsigned Elts = NumElem * NumMemElts;
15991 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15992 } else {
15993 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
15994 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
15995 }
15996
15997 SDLoc LoadDL(LoadNodes[0].MemNode);
15998 SDLoc StoreDL(StoreNodes[0].MemNode);
15999
16000 // The merged loads are required to have the same incoming chain, so
16001 // using the first's chain is acceptable.
16002
16003 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
16004 AddToWorklist(NewStoreChain.getNode());
16005
16006 MachineMemOperand::Flags LdMMOFlags =
16007 isDereferenceable ? MachineMemOperand::MODereferenceable
16009 if (IsNonTemporalLoad)
16011
16012 MachineMemOperand::Flags StMMOFlags =
16013 IsNonTemporalStore ? MachineMemOperand::MONonTemporal
16015
16016 SDValue NewLoad, NewStore;
16017 if (UseVectorTy || !DoIntegerTruncate) {
16018 NewLoad =
16019 DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
16020 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16021 FirstLoadAlign, LdMMOFlags);
16022 NewStore = DAG.getStore(
16023 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
16024 FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
16025 } else { // This must be the truncstore/extload case
16026 EVT ExtendedTy =
16027 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
16028 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
16029 FirstLoad->getChain(), FirstLoad->getBasePtr(),
16030 FirstLoad->getPointerInfo(), JointMemOpVT,
16031 FirstLoadAlign, LdMMOFlags);
16032 NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
16033 FirstInChain->getBasePtr(),
16034 FirstInChain->getPointerInfo(),
16035 JointMemOpVT, FirstInChain->getAlignment(),
16036 FirstInChain->getMemOperand()->getFlags());
16037 }
16038
16039 // Transfer chain users from old loads to the new load.
16040 for (unsigned i = 0; i < NumElem; ++i) {
16041 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
16043 SDValue(NewLoad.getNode(), 1));
16044 }
16045
16046 // Replace the all stores with the new store. Recursively remove
16047 // corresponding value if its no longer used.
16048 for (unsigned i = 0; i < NumElem; ++i) {
16049 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
16050 CombineTo(StoreNodes[i].MemNode, NewStore);
16051 if (Val.getNode()->use_empty())
16052 recursivelyDeleteUnusedNodes(Val.getNode());
16053 }
16054
16055 RV = true;
16056 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16057 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16058 NumConsecutiveStores -= NumElem;
16059 }
16060 }
16061 return RV;
16062}
16063
16064SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
16065 SDLoc SL(ST);
16066 SDValue ReplStore;
16067
16068 // Replace the chain to avoid dependency.
16069 if (ST->isTruncatingStore()) {
16070 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
16071 ST->getBasePtr(), ST->getMemoryVT(),
16072 ST->getMemOperand());
16073 } else {
16074 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
16075 ST->getMemOperand());
16076 }
16077
16078 // Create token to keep both nodes around.
16079 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
16080 MVT::Other, ST->getChain(), ReplStore);
16081
16082 // Make sure the new and old chains are cleaned up.
16083 AddToWorklist(Token.getNode());
16084
16085 // Don't add users to work list.
16086 return CombineTo(ST, Token, false);
16087}
16088
16089SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
16090 SDValue Value = ST->getValue();
16091 if (Value.getOpcode() == ISD::TargetConstantFP)
16092 return SDValue();
16093
16094 SDLoc DL(ST);
16095
16096 SDValue Chain = ST->getChain();
16097 SDValue Ptr = ST->getBasePtr();
16098
16099 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
16100
16101 // NOTE: If the original store is volatile, this transform must not increase
16102 // the number of stores. For example, on x86-32 an f64 can be stored in one
16103 // processor operation but an i64 (which is not legal) requires two. So the
16104 // transform should not be done in this case.
16105
16106 SDValue Tmp;
16107 switch (CFP->getSimpleValueType(0).SimpleTy) {
16108 default:
16109 llvm_unreachable("Unknown FP type");
16110 case MVT::f16: // We don't do this for these yet.
16111 case MVT::f80:
16112 case MVT::f128:
16113 case MVT::ppcf128:
16114 return SDValue();
16115 case MVT::f32:
16116 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
16118 ;
16119 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
16120 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
16121 MVT::i32);
16122 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
16123 }
16124
16125 return SDValue();
16126 case MVT::f64:
16127 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
16128 !ST->isVolatile()) ||
16130 ;
16131 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
16132 getZExtValue(), SDLoc(CFP), MVT::i64);
16133 return DAG.getStore(Chain, DL, Tmp,
16134 Ptr, ST->getMemOperand());
16135 }
16136
16137 if (!ST->isVolatile() &&
16139 // Many FP stores are not made apparent until after legalize, e.g. for
16140 // argument passing. Since this is so common, custom legalize the
16141 // 64-bit integer store into two 32-bit stores.
16142 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
16143 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
16144 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
16145 if (DAG.getDataLayout().isBigEndian())
16146 std::swap(Lo, Hi);
16147
16148 unsigned Alignment = ST->getAlignment();
16149 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16150 AAMDNodes AAInfo = ST->getAAInfo();
16151
16152 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16153 ST->getAlignment(), MMOFlags, AAInfo);
16154 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
16155 DAG.getConstant(4, DL, Ptr.getValueType()));
16156 Alignment = MinAlign(Alignment, 4U);
16157 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
16158 ST->getPointerInfo().getWithOffset(4),
16159 Alignment, MMOFlags, AAInfo);
16160 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
16161 St0, St1);
16162 }
16163
16164 return SDValue();
16165 }
16166}
16167
16168SDValue DAGCombiner::visitSTORE(SDNode *N) {
16169 StoreSDNode *ST = cast<StoreSDNode>(N);
16170 SDValue Chain = ST->getChain();
16171 SDValue Value = ST->getValue();
16172 SDValue Ptr = ST->getBasePtr();
16173
16174 // If this is a store of a bit convert, store the input value if the
16175 // resultant store does not need a higher alignment than the original.
16176 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
16177 ST->isUnindexed()) {
16178 EVT SVT = Value.getOperand(0).getValueType();
16179 // If the store is volatile, we only want to change the store type if the
16180 // resulting store is legal. Otherwise we might increase the number of
16181 // memory accesses. We don't care if the original type was legal or not
16182 // as we assume software couldn't rely on the number of accesses of an
16183 // illegal type.
16184 if (((!LegalOperations && !ST->isVolatile()) ||
16185 TLI.isOperationLegal(ISD::STORE, SVT)) &&
16186 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
16187 DAG, *ST->getMemOperand())) {
16188 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
16189 ST->getPointerInfo(), ST->getAlignment(),
16190 ST->getMemOperand()->getFlags(), ST->getAAInfo());
16191 }
16192 }
16193
16194 // Turn 'store undef, Ptr' -> nothing.
16195 if (Value.isUndef() && ST->isUnindexed())
16196 return Chain;
16197
16198 // Try to infer better alignment information than the store already has.
16199 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
16200 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
16201 if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
16202 SDValue NewStore =
16203 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
16204 ST->getMemoryVT(), Align,
16205 ST->getMemOperand()->getFlags(), ST->getAAInfo());
16206 // NewStore will always be N as we are only refining the alignment
16207 assert(NewStore.getNode() == N);
16208 (void)NewStore;
16209 }
16210 }
16211 }
16212
16213 // Try transforming a pair floating point load / store ops to integer
16214 // load / store ops.
16215 if (SDValue NewST = TransformFPLoadStorePair(N))
16216 return NewST;
16217
16218 // Try transforming several stores into STORE (BSWAP).
16219 if (SDValue Store = MatchStoreCombine(ST))
16220 return Store;
16221
16222 if (ST->isUnindexed()) {
16223 // Walk up chain skipping non-aliasing memory nodes, on this store and any
16224 // adjacent stores.
16225 if (findBetterNeighborChains(ST)) {
16226 // replaceStoreChain uses CombineTo, which handled all of the worklist
16227 // manipulation. Return the original node to not do anything else.
16228 return SDValue(ST, 0);
16229 }
16230 Chain = ST->getChain();
16231 }
16232
16233 // FIXME: is there such a thing as a truncating indexed store?
16234 if (ST->isTruncatingStore() && ST->isUnindexed() &&
16235 Value.getValueType().isInteger() &&
16236 (!isa<ConstantSDNode>(Value) ||
16237 !cast<ConstantSDNode>(Value)->isOpaque())) {
16238 APInt TruncDemandedBits =
16239 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
16240 ST->getMemoryVT().getScalarSizeInBits());
16241
16242 // See if we can simplify the input to this truncstore with knowledge that
16243 // only the low bits are being used. For example:
16244 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
16245 SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits);
16246 AddToWorklist(Value.getNode());
16247 if (Shorter)
16248 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
16249 ST->getMemOperand());
16250
16251 // Otherwise, see if we can simplify the operation with
16252 // SimplifyDemandedBits, which only works if the value has a single use.
16253 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
16254 // Re-visit the store if anything changed and the store hasn't been merged
16255 // with another node (N is deleted) SimplifyDemandedBits will add Value's
16256 // node back to the worklist if necessary, but we also need to re-visit
16257 // the Store node itself.
16258 if (N->getOpcode() != ISD::DELETED_NODE)
16259 AddToWorklist(N);
16260 return SDValue(N, 0);
16261 }
16262 }
16263
16264 // If this is a load followed by a store to the same location, then the store
16265 // is dead/noop.
16266 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
16267 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
16268 ST->isUnindexed() && !ST->isVolatile() &&
16269 // There can't be any side effects between the load and store, such as
16270 // a call or store.
16272 // The store is dead, remove it.
16273 return Chain;
16274 }
16275 }
16276
16277 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
16278 if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
16279 !ST1->isVolatile()) {
16280 if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
16281 ST->getMemoryVT() == ST1->getMemoryVT()) {
16282 // If this is a store followed by a store with the same value to the
16283 // same location, then the store is dead/noop.
16284 return Chain;
16285 }
16286
16287 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
16288 !ST1->getBasePtr().isUndef()) {
16289 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
16290 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
16291 unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
16292 unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
16293 // If this is a store who's preceding store to a subset of the current
16294 // location and no one other node is chained to that store we can
16295 // effectively drop the store. Do not remove stores to undef as they may
16296 // be used as data sinks.
16297 if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
16298 CombineTo(ST1, ST1->getChain());
16299 return SDValue();
16300 }
16301
16302 // If ST stores to a subset of preceding store's write set, we may be
16303 // able to fold ST's value into the preceding stored value. As we know
16304 // the other uses of ST1's chain are unconcerned with ST, this folding
16305 // will not affect those nodes.
16306 int64_t BitOffset;
16307 if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
16308 BitOffset)) {
16309 SDValue ChainValue = ST1->getValue();
16310 if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
16311 if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
16312 APInt Val = C1->getAPIntValue();
16313 APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
16314 // FIXME: Handle Big-endian mode.
16315 if (!DAG.getDataLayout().isBigEndian()) {
16316 Val.insertBits(InsertVal, BitOffset);
16317 SDValue NewSDVal =
16318 DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
16319 C1->isTargetOpcode(), C1->isOpaque());
16320 SDNode *NewST1 = DAG.UpdateNodeOperands(
16321 ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
16322 ST1->getOperand(3));
16323 return CombineTo(ST, SDValue(NewST1, 0));
16324 }
16325 }
16326 }
16327 } // End ST subset of ST1 case.
16328 }
16329 }
16330 }
16331
16332 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
16333 // truncating store. We can do this even if this is already a truncstore.
16334 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
16335 && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
16336 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
16337 ST->getMemoryVT())) {
16338 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
16339 Ptr, ST->getMemoryVT(), ST->getMemOperand());
16340 }
16341
16342 // Always perform this optimization before types are legal. If the target
16343 // prefers, also try this after legalization to catch stores that were created
16344 // by intrinsics or other nodes.
16345 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
16346 while (true) {
16347 // There can be multiple store sequences on the same chain.
16348 // Keep trying to merge store sequences until we are unable to do so
16349 // or until we merge the last store on the chain.
16350 bool Changed = MergeConsecutiveStores(ST);
16351 if (!Changed) break;
16352 // Return N as merge only uses CombineTo and no worklist clean
16353 // up is necessary.
16354 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
16355 return SDValue(N, 0);
16356 }
16357 }
16358
16359 // Try transforming N to an indexed store.
16360 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16361 return SDValue(N, 0);
16362
16363 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
16364 //
16365 // Make sure to do this only after attempting to merge stores in order to
16366 // avoid changing the types of some subset of stores due to visit order,
16367 // preventing their merging.
16368 if (isa<ConstantFPSDNode>(ST->getValue())) {
16369 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
16370 return NewSt;
16371 }
16372
16373 if (SDValue NewSt = splitMergedValStore(ST))
16374 return NewSt;
16375
16376 return ReduceLoadOpStoreWidth(N);
16377}
16378
16379SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
16380 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
16381 if (!LifetimeEnd->hasOffset())
16382 return SDValue();
16383
16384 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
16385 LifetimeEnd->getOffset(), false);
16386
16387 // We walk up the chains to find stores.
16388 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
16389 while (!Chains.empty()) {
16390 SDValue Chain = Chains.back();
16391 Chains.pop_back();
16392 if (!Chain.hasOneUse())
16393 continue;
16394 switch (Chain.getOpcode()) {
16395 case ISD::TokenFactor:
16396 for (unsigned Nops = Chain.getNumOperands(); Nops;)
16397 Chains.push_back(Chain.getOperand(--Nops));
16398 break;
16400 case ISD::LIFETIME_END:
16401 // We can forward past any lifetime start/end that can be proven not to
16402 // alias the node.
16403 if (!isAlias(Chain.getNode(), N))
16404 Chains.push_back(Chain.getOperand(0));
16405 break;
16406 case ISD::STORE: {
16407 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
16408 if (ST->isVolatile() || ST->isIndexed())
16409 continue;
16410 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
16411 // If we store purely within object bounds just before its lifetime ends,
16412 // we can remove the store.
16413 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
16414 ST->getMemoryVT().getStoreSizeInBits())) {
16415 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
16416 dbgs() << "\nwithin LIFETIME_END of : ";
16417 LifetimeEndBase.dump(); dbgs() << "\n");
16418 CombineTo(ST, ST->getChain());
16419 return SDValue(N, 0);
16420 }
16421 }
16422 }
16423 }
16424 return SDValue();
16425}
16426
16427/// For the instruction sequence of store below, F and I values
16428/// are bundled together as an i64 value before being stored into memory.
16429/// Sometimes it is more efficent to generate separate stores for F and I,
16430/// which can remove the bitwise instructions or sink them to colder places.
16431///
16432/// (store (or (zext (bitcast F to i32) to i64),
16433/// (shl (zext I to i64), 32)), addr) -->
16434/// (store F, addr) and (store I, addr+4)
16435///
16436/// Similarly, splitting for other merged store can also be beneficial, like:
16437/// For pair of {i32, i32}, i64 store --> two i32 stores.
16438/// For pair of {i32, i16}, i64 store --> two i32 stores.
16439/// For pair of {i16, i16}, i32 store --> two i16 stores.
16440/// For pair of {i16, i8}, i32 store --> two i16 stores.
16441/// For pair of {i8, i8}, i16 store --> two i8 stores.
16442///
16443/// We allow each target to determine specifically which kind of splitting is
16444/// supported.
16445///
16446/// The store patterns are commonly seen from the simple code snippet below
16447/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
16448/// void goo(const std::pair<int, float> &);
16449/// hoo() {
16450/// ...
16451/// goo(std::make_pair(tmp, ftmp));
16452/// ...
16453/// }
16454///
16456 if (OptLevel == CodeGenOpt::None)
16457 return SDValue();
16458
16459 SDValue Val = ST->getValue();
16460 SDLoc DL(ST);
16461
16462 // Match OR operand.
16463 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
16464 return SDValue();
16465
16466 // Match SHL operand and get Lower and Higher parts of Val.
16467 SDValue Op1 = Val.getOperand(0);
16468 SDValue Op2 = Val.getOperand(1);
16469 SDValue Lo, Hi;
16470 if (Op1.getOpcode() != ISD::SHL) {
16471 std::swap(Op1, Op2);
16472 if (Op1.getOpcode() != ISD::SHL)
16473 return SDValue();
16474 }
16475 Lo = Op2;
16476 Hi = Op1.getOperand(0);
16477 if (!Op1.hasOneUse())
16478 return SDValue();
16479
16480 // Match shift amount to HalfValBitSize.
16481 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
16482 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
16483 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
16484 return SDValue();
16485
16486 // Lo and Hi are zero-extended from int with size less equal than 32
16487 // to i64.
16488 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
16489 !Lo.getOperand(0).getValueType().isScalarInteger() ||
16490 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
16491 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
16492 !Hi.getOperand(0).getValueType().isScalarInteger() ||
16493 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
16494 return SDValue();
16495
16496 // Use the EVT of low and high parts before bitcast as the input
16497 // of target query.
16498 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
16499 ? Lo.getOperand(0).getValueType()
16500 : Lo.getValueType();
16501 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
16502 ? Hi.getOperand(0).getValueType()
16503 : Hi.getValueType();
16504 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
16505 return SDValue();
16506
16507 // Start to split store.
16508 unsigned Alignment = ST->getAlignment();
16509 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16510 AAMDNodes AAInfo = ST->getAAInfo();
16511
16512 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
16513 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
16514 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
16515 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
16516
16517 SDValue Chain = ST->getChain();
16518 SDValue Ptr = ST->getBasePtr();
16519 // Lower value store.
16520 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16521 ST->getAlignment(), MMOFlags, AAInfo);
16522 Ptr =
16523 DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
16524 DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
16525 // Higher value store.
16526 SDValue St1 =
16527 DAG.getStore(St0, DL, Hi, Ptr,
16528 ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
16529 Alignment / 2, MMOFlags, AAInfo);
16530 return St1;
16531}
16532
16533/// Convert a disguised subvector insertion into a shuffle:
16534/// insert_vector_elt V, (bitcast X from vector type), IdxC -->
16535/// bitcast(shuffle (bitcast V), (extended X), Mask)
16536/// Note: We do not use an insert_subvector node because that requires a legal
16537/// subvector type.
16538SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
16539 SDValue InsertVal = N->getOperand(1);
16540 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
16541 !InsertVal.getOperand(0).getValueType().isVector())
16542 return SDValue();
16543
16544 SDValue SubVec = InsertVal.getOperand(0);
16545 SDValue DestVec = N->getOperand(0);
16546 EVT SubVecVT = SubVec.getValueType();
16547 EVT VT = DestVec.getValueType();
16548 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
16549 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
16550 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
16551
16552 // Step 1: Create a shuffle mask that implements this insert operation. The
16553 // vector that we are inserting into will be operand 0 of the shuffle, so
16554 // those elements are just 'i'. The inserted subvector is in the first
16555 // positions of operand 1 of the shuffle. Example:
16556 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
16557 SmallVector<int, 16> Mask(NumMaskVals);
16558 for (unsigned i = 0; i != NumMaskVals; ++i) {
16559 if (i / NumSrcElts == InsIndex)
16560 Mask[i] = (i % NumSrcElts) + NumMaskVals;
16561 else
16562 Mask[i] = i;
16563 }
16564
16565 // Bail out if the target can not handle the shuffle we want to create.
16566 EVT SubVecEltVT = SubVecVT.getVectorElementType();
16567 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
16568 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
16569 return SDValue();
16570
16571 // Step 2: Create a wide vector from the inserted source vector by appending
16572 // undefined elements. This is the same size as our destination vector.
16573 SDLoc DL(N);
16574 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
16575 ConcatOps[0] = SubVec;
16576 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
16577
16578 // Step 3: Shuffle in the padded subvector.
16579 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
16580 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
16581 AddToWorklist(PaddedSubV.getNode());
16582 AddToWorklist(DestVecBC.getNode());
16583 AddToWorklist(Shuf.getNode());
16584 return DAG.getBitcast(VT, Shuf);
16585}
16586
16587SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
16588 SDValue InVec = N->getOperand(0);
16589 SDValue InVal = N->getOperand(1);
16590 SDValue EltNo = N->getOperand(2);
16591 SDLoc DL(N);
16592
16593 // If the inserted element is an UNDEF, just use the input vector.
16594 if (InVal.isUndef())
16595 return InVec;
16596
16597 EVT VT = InVec.getValueType();
16598 unsigned NumElts = VT.getVectorNumElements();
16599
16600 // Remove redundant insertions:
16601 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
16602 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16603 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
16604 return InVec;
16605
16606 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16607 if (!IndexC) {
16608 // If this is variable insert to undef vector, it might be better to splat:
16609 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
16610 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
16611 SmallVector<SDValue, 8> Ops(NumElts, InVal);
16612 return DAG.getBuildVector(VT, DL, Ops);
16613 }
16614 return SDValue();
16615 }
16616
16617 // We must know which element is being inserted for folds below here.
16618 unsigned Elt = IndexC->getZExtValue();
16619 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
16620 return Shuf;
16621
16622 // Canonicalize insert_vector_elt dag nodes.
16623 // Example:
16624 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
16625 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
16626 //
16627 // Do this only if the child insert_vector node has one use; also
16628 // do this only if indices are both constants and Idx1 < Idx0.
16629 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
16630 && isa<ConstantSDNode>(InVec.getOperand(2))) {
16631 unsigned OtherElt = InVec.getConstantOperandVal(2);
16632 if (Elt < OtherElt) {
16633 // Swap nodes.
16634 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
16635 InVec.getOperand(0), InVal, EltNo);
16636 AddToWorklist(NewOp.getNode());
16637 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
16638 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
16639 }
16640 }
16641
16642 // If we can't generate a legal BUILD_VECTOR, exit
16643 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
16644 return SDValue();
16645
16646 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
16647 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
16648 // vector elements.
16650 // Do not combine these two vectors if the output vector will not replace
16651 // the input vector.
16652 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
16653 Ops.append(InVec.getNode()->op_begin(),
16654 InVec.getNode()->op_end());
16655 } else if (InVec.isUndef()) {
16656 Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
16657 } else {
16658 return SDValue();
16659 }
16660 assert(Ops.size() == NumElts && "Unexpected vector size");
16661
16662 // Insert the element
16663 if (Elt < Ops.size()) {
16664 // All the operands of BUILD_VECTOR must have the same type;
16665 // we enforce that here.
16666 EVT OpVT = Ops[0].getValueType();
16667 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
16668 }
16669
16670 // Return the new vector
16671 return DAG.getBuildVector(VT, DL, Ops);
16672}
16673
16674SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
16675 SDValue EltNo,
16676 LoadSDNode *OriginalLoad) {
16677 assert(!OriginalLoad->isVolatile());
16678
16679 EVT ResultVT = EVE->getValueType(0);
16680 EVT VecEltVT = InVecVT.getVectorElementType();
16681 unsigned Align = OriginalLoad->getAlignment();
16682 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
16683 VecEltVT.getTypeForEVT(*DAG.getContext()));
16684
16685 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
16686 return SDValue();
16687
16688 ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
16690 if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
16691 return SDValue();
16692
16693 Align = NewAlign;
16694
16695 SDValue NewPtr = OriginalLoad->getBasePtr();
16697 EVT PtrType = NewPtr.getValueType();
16699 SDLoc DL(EVE);
16700 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
16701 int Elt = ConstEltNo->getZExtValue();
16702 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
16703 Offset = DAG.getConstant(PtrOff, DL, PtrType);
16704 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
16705 } else {
16706 Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
16707 Offset = DAG.getNode(
16708 ISD::MUL, DL, PtrType, Offset,
16709 DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
16710 // Discard the pointer info except the address space because the memory
16711 // operand can't represent this new access since the offset is variable.
16712 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
16713 }
16714 NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
16715
16716 // The replacement we need to do here is a little tricky: we need to
16717 // replace an extractelement of a load with a load.
16718 // Use ReplaceAllUsesOfValuesWith to do the replacement.
16719 // Note that this replacement assumes that the extractvalue is the only
16720 // use of the load; that's okay because we don't want to perform this
16721 // transformation in other cases anyway.
16722 SDValue Load;
16723 SDValue Chain;
16724 if (ResultVT.bitsGT(VecEltVT)) {
16725 // If the result type of vextract is wider than the load, then issue an
16726 // extending load instead.
16727 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
16728 VecEltVT)
16730 : ISD::EXTLOAD;
16731 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
16732 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
16733 Align, OriginalLoad->getMemOperand()->getFlags(),
16734 OriginalLoad->getAAInfo());
16735 Chain = Load.getValue(1);
16736 } else {
16737 Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
16738 MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
16739 OriginalLoad->getAAInfo());
16740 Chain = Load.getValue(1);
16741 if (ResultVT.bitsLT(VecEltVT))
16742 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
16743 else
16744 Load = DAG.getBitcast(ResultVT, Load);
16745 }
16746 WorklistRemover DeadNodes(*this);
16747 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
16748 SDValue To[] = { Load, Chain };
16749 DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
16750 // Since we're explicitly calling ReplaceAllUses, add the new node to the
16751 // worklist explicitly as well.
16752 AddToWorklist(Load.getNode());
16753 AddUsersToWorklist(Load.getNode()); // Add users too
16754 // Make sure to revisit this node to clean it up; it will usually be dead.
16755 AddToWorklist(EVE);
16756 ++OpsNarrowed;
16757 return SDValue(EVE, 0);
16758}
16759
16760/// Transform a vector binary operation into a scalar binary operation by moving
16761/// the math/logic after an extract element of a vector.
16763 bool LegalOperations) {
16764 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16765 SDValue Vec = ExtElt->getOperand(0);
16766 SDValue Index = ExtElt->getOperand(1);
16767 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16768 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
16769 Vec.getNode()->getNumValues() != 1)
16770 return SDValue();
16771
16772 // Targets may want to avoid this to prevent an expensive register transfer.
16773 if (!TLI.shouldScalarizeBinop(Vec))
16774 return SDValue();
16775
16776 // Extracting an element of a vector constant is constant-folded, so this
16777 // transform is just replacing a vector op with a scalar op while moving the
16778 // extract.
16779 SDValue Op0 = Vec.getOperand(0);
16780 SDValue Op1 = Vec.getOperand(1);
16781 if (isAnyConstantBuildVector(Op0, true) ||
16782 isAnyConstantBuildVector(Op1, true)) {
16783 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
16784 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
16785 SDLoc DL(ExtElt);
16786 EVT VT = ExtElt->getValueType(0);
16787 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
16788 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
16789 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
16790 }
16791
16792 return SDValue();
16793}
16794
16795SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
16796 SDValue VecOp = N->getOperand(0);
16797 SDValue Index = N->getOperand(1);
16798 EVT ScalarVT = N->getValueType(0);
16799 EVT VecVT = VecOp.getValueType();
16800 if (VecOp.isUndef())
16801 return DAG.getUNDEF(ScalarVT);
16802
16803 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
16804 //
16805 // This only really matters if the index is non-constant since other combines
16806 // on the constant elements already work.
16807 SDLoc DL(N);
16808 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
16809 Index == VecOp.getOperand(2)) {
16810 SDValue Elt = VecOp.getOperand(1);
16811 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
16812 }
16813
16814 // (vextract (scalar_to_vector val, 0) -> val
16815 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16816 // Check if the result type doesn't match the inserted element type. A
16817 // SCALAR_TO_VECTOR may truncate the inserted element and the
16818 // EXTRACT_VECTOR_ELT may widen the extracted vector.
16819 SDValue InOp = VecOp.getOperand(0);
16820 if (InOp.getValueType() != ScalarVT) {
16821 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16822 return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16823 }
16824 return InOp;
16825 }
16826
16827 // extract_vector_elt of out-of-bounds element -> UNDEF
16828 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16829 unsigned NumElts = VecVT.getVectorNumElements();
16830 if (IndexC && IndexC->getAPIntValue().uge(NumElts))
16831 return DAG.getUNDEF(ScalarVT);
16832
16833 // extract_vector_elt (build_vector x, y), 1 -> y
16834 if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
16835 TLI.isTypeLegal(VecVT) &&
16836 (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
16837 SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
16838 EVT InEltVT = Elt.getValueType();
16839
16840 // Sometimes build_vector's scalar input types do not match result type.
16841 if (ScalarVT == InEltVT)
16842 return Elt;
16843
16844 // TODO: It may be useful to truncate if free if the build_vector implicitly
16845 // converts.
16846 }
16847
16848 // TODO: These transforms should not require the 'hasOneUse' restriction, but
16849 // there are regressions on multiple targets without it. We can end up with a
16850 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
16851 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
16852 VecOp.hasOneUse()) {
16853 // The vector index of the LSBs of the source depend on the endian-ness.
16854 bool IsLE = DAG.getDataLayout().isLittleEndian();
16855 unsigned ExtractIndex = IndexC->getZExtValue();
16856 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
16857 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
16858 SDValue BCSrc = VecOp.getOperand(0);
16859 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
16860 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
16861
16862 if (LegalTypes && BCSrc.getValueType().isInteger() &&
16863 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16864 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
16865 // trunc i64 X to i32
16866 SDValue X = BCSrc.getOperand(0);
16867 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
16868 "Extract element and scalar to vector can't change element type "
16869 "from FP to integer.");
16870 unsigned XBitWidth = X.getValueSizeInBits();
16871 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
16872 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
16873
16874 // An extract element return value type can be wider than its vector
16875 // operand element type. In that case, the high bits are undefined, so
16876 // it's possible that we may need to extend rather than truncate.
16877 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
16878 assert(XBitWidth % VecEltBitWidth == 0 &&
16879 "Scalar bitwidth must be a multiple of vector element bitwidth");
16880 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
16881 }
16882 }
16883 }
16884
16885 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
16886 return BO;
16887
16888 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
16889 // We only perform this optimization before the op legalization phase because
16890 // we may introduce new vector instructions which are not backed by TD
16891 // patterns. For example on AVX, extracting elements from a wide vector
16892 // without using extract_subvector. However, if we can find an underlying
16893 // scalar value, then we can always use that.
16894 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
16895 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
16896 // Find the new index to extract from.
16897 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
16898
16899 // Extracting an undef index is undef.
16900 if (OrigElt == -1)
16901 return DAG.getUNDEF(ScalarVT);
16902
16903 // Select the right vector half to extract from.
16904 SDValue SVInVec;
16905 if (OrigElt < (int)NumElts) {
16906 SVInVec = VecOp.getOperand(0);
16907 } else {
16908 SVInVec = VecOp.getOperand(1);
16909 OrigElt -= NumElts;
16910 }
16911
16912 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
16913 SDValue InOp = SVInVec.getOperand(OrigElt);
16914 if (InOp.getValueType() != ScalarVT) {
16915 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16916 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16917 }
16918
16919 return InOp;
16920 }
16921
16922 // FIXME: We should handle recursing on other vector shuffles and
16923 // scalar_to_vector here as well.
16924
16925 if (!LegalOperations ||
16926 // FIXME: Should really be just isOperationLegalOrCustom.
16929 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16930 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
16931 DAG.getConstant(OrigElt, DL, IndexTy));
16932 }
16933 }
16934
16935 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
16936 // simplify it based on the (valid) extraction indices.
16937 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
16938 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16939 Use->getOperand(0) == VecOp &&
16940 isa<ConstantSDNode>(Use->getOperand(1));
16941 })) {
16942 APInt DemandedElts = APInt::getNullValue(NumElts);
16943 for (SDNode *Use : VecOp->uses()) {
16944 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
16945 if (CstElt->getAPIntValue().ult(NumElts))
16946 DemandedElts.setBit(CstElt->getZExtValue());
16947 }
16948 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
16949 // We simplified the vector operand of this extract element. If this
16950 // extract is not dead, visit it again so it is folded properly.
16951 if (N->getOpcode() != ISD::DELETED_NODE)
16952 AddToWorklist(N);
16953 return SDValue(N, 0);
16954 }
16955 }
16956
16957 // Everything under here is trying to match an extract of a loaded value.
16958 // If the result of load has to be truncated, then it's not necessarily
16959 // profitable.
16960 bool BCNumEltsChanged = false;
16961 EVT ExtVT = VecVT.getVectorElementType();
16962 EVT LVT = ExtVT;
16963 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
16964 return SDValue();
16965
16966 if (VecOp.getOpcode() == ISD::BITCAST) {
16967 // Don't duplicate a load with other uses.
16968 if (!VecOp.hasOneUse())
16969 return SDValue();
16970
16971 EVT BCVT = VecOp.getOperand(0).getValueType();
16972 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
16973 return SDValue();
16974 if (NumElts != BCVT.getVectorNumElements())
16975 BCNumEltsChanged = true;
16976 VecOp = VecOp.getOperand(0);
16977 ExtVT = BCVT.getVectorElementType();
16978 }
16979
16980 // extract (vector load $addr), i --> load $addr + i * size
16981 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
16982 ISD::isNormalLoad(VecOp.getNode()) &&
16983 !Index->hasPredecessor(VecOp.getNode())) {
16984 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
16985 if (VecLoad && !VecLoad->isVolatile())
16986 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
16987 }
16988
16989 // Perform only after legalization to ensure build_vector / vector_shuffle
16990 // optimizations have already been done.
16991 if (!LegalOperations || !IndexC)
16992 return SDValue();
16993
16994 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
16995 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
16996 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
16997 int Elt = IndexC->getZExtValue();
16998 LoadSDNode *LN0 = nullptr;
16999 if (ISD::isNormalLoad(VecOp.getNode())) {
17000 LN0 = cast<LoadSDNode>(VecOp);
17001 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17002 VecOp.getOperand(0).getValueType() == ExtVT &&
17003 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
17004 // Don't duplicate a load with other uses.
17005 if (!VecOp.hasOneUse())
17006 return SDValue();
17007
17008 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
17009 }
17010 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
17011 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
17012 // =>
17013 // (load $addr+1*size)
17014
17015 // Don't duplicate a load with other uses.
17016 if (!VecOp.hasOneUse())
17017 return SDValue();
17018
17019 // If the bit convert changed the number of elements, it is unsafe
17020 // to examine the mask.
17021 if (BCNumEltsChanged)
17022 return SDValue();
17023
17024 // Select the input vector, guarding against out of range extract vector.
17025 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
17026 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
17027
17028 if (VecOp.getOpcode() == ISD::BITCAST) {
17029 // Don't duplicate a load with other uses.
17030 if (!VecOp.hasOneUse())
17031 return SDValue();
17032
17033 VecOp = VecOp.getOperand(0);
17034 }
17035 if (ISD::isNormalLoad(VecOp.getNode())) {
17036 LN0 = cast<LoadSDNode>(VecOp);
17037 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
17038 Index = DAG.getConstant(Elt, DL, Index.getValueType());
17039 }
17040 }
17041
17042 // Make sure we found a non-volatile load and the extractelement is
17043 // the only use.
17044 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
17045 return SDValue();
17046
17047 // If Idx was -1 above, Elt is going to be -1, so just return undef.
17048 if (Elt == -1)
17049 return DAG.getUNDEF(LVT);
17050
17051 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
17052}
17053
17054// Simplify (build_vec (ext )) to (bitcast (build_vec ))
17055SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
17056 // We perform this optimization post type-legalization because
17057 // the type-legalizer often scalarizes integer-promoted vectors.
17058 // Performing this optimization before may create bit-casts which
17059 // will be type-legalized to complex code sequences.
17060 // We perform this optimization only before the operation legalizer because we
17061 // may introduce illegal operations.
17062 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
17063 return SDValue();
17064
17065 unsigned NumInScalars = N->getNumOperands();
17066 SDLoc DL(N);
17067 EVT VT = N->getValueType(0);
17068
17069 // Check to see if this is a BUILD_VECTOR of a bunch of values
17070 // which come from any_extend or zero_extend nodes. If so, we can create
17071 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
17072 // optimizations. We do not handle sign-extend because we can't fill the sign
17073 // using shuffles.
17074 EVT SourceType = MVT::Other;
17075 bool AllAnyExt = true;
17076
17077 for (unsigned i = 0; i != NumInScalars; ++i) {
17078 SDValue In = N->getOperand(i);
17079 // Ignore undef inputs.
17080 if (In.isUndef()) continue;
17081
17082 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
17083 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
17084
17085 // Abort if the element is not an extension.
17086 if (!ZeroExt && !AnyExt) {
17087 SourceType = MVT::Other;
17088 break;
17089 }
17090
17091 // The input is a ZeroExt or AnyExt. Check the original type.
17092 EVT InTy = In.getOperand(0).getValueType();
17093
17094 // Check that all of the widened source types are the same.
17095 if (SourceType == MVT::Other)
17096 // First time.
17097 SourceType = InTy;
17098 else if (InTy != SourceType) {
17099 // Multiple income types. Abort.
17100 SourceType = MVT::Other;
17101 break;
17102 }
17103
17104 // Check if all of the extends are ANY_EXTENDs.
17105 AllAnyExt &= AnyExt;
17106 }
17107
17108 // In order to have valid types, all of the inputs must be extended from the
17109 // same source type and all of the inputs must be any or zero extend.
17110 // Scalar sizes must be a power of two.
17111 EVT OutScalarTy = VT.getScalarType();
17112 bool ValidTypes = SourceType != MVT::Other &&
17113 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
17114 isPowerOf2_32(SourceType.getSizeInBits());
17115
17116 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
17117 // turn into a single shuffle instruction.
17118 if (!ValidTypes)
17119 return SDValue();
17120
17121 bool isLE = DAG.getDataLayout().isLittleEndian();
17122 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
17123 assert(ElemRatio > 1 && "Invalid element size ratio");
17124 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
17125 DAG.getConstant(0, DL, SourceType);
17126
17127 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
17128 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
17129
17130 // Populate the new build_vector
17131 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17132 SDValue Cast = N->getOperand(i);
17133 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
17134 Cast.getOpcode() == ISD::ZERO_EXTEND ||
17135 Cast.isUndef()) && "Invalid cast opcode");
17136 SDValue In;
17137 if (Cast.isUndef())
17138 In = DAG.getUNDEF(SourceType);
17139 else
17140 In = Cast->getOperand(0);
17141 unsigned Index = isLE ? (i * ElemRatio) :
17142 (i * ElemRatio + (ElemRatio - 1));
17143
17144 assert(Index < Ops.size() && "Invalid index");
17145 Ops[Index] = In;
17146 }
17147
17148 // The type of the new BUILD_VECTOR node.
17149 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
17150 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
17151 "Invalid vector size");
17152 // Check if the new vector type is legal.
17153 if (!isTypeLegal(VecVT) ||
17154 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
17156 return SDValue();
17157
17158 // Make the new BUILD_VECTOR.
17159 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
17160
17161 // The new BUILD_VECTOR node has the potential to be further optimized.
17162 AddToWorklist(BV.getNode());
17163 // Bitcast to the desired type.
17164 return DAG.getBitcast(VT, BV);
17165}
17166
17167SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
17168 ArrayRef<int> VectorMask,
17169 SDValue VecIn1, SDValue VecIn2,
17170 unsigned LeftIdx, bool DidSplitVec) {
17171 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17172 SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
17173
17174 EVT VT = N->getValueType(0);
17175 EVT InVT1 = VecIn1.getValueType();
17176 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
17177
17178 unsigned NumElems = VT.getVectorNumElements();
17179 unsigned ShuffleNumElems = NumElems;
17180
17181 // If we artificially split a vector in two already, then the offsets in the
17182 // operands will all be based off of VecIn1, even those in VecIn2.
17183 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
17184
17185 // We can't generate a shuffle node with mismatched input and output types.
17186 // Try to make the types match the type of the output.
17187 if (InVT1 != VT || InVT2 != VT) {
17188 if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
17189 // If the output vector length is a multiple of both input lengths,
17190 // we can concatenate them and pad the rest with undefs.
17191 unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
17192 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
17193 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
17194 ConcatOps[0] = VecIn1;
17195 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
17196 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17197 VecIn2 = SDValue();
17198 } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
17199 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
17200 return SDValue();
17201
17202 if (!VecIn2.getNode()) {
17203 // If we only have one input vector, and it's twice the size of the
17204 // output, split it in two.
17205 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
17206 DAG.getConstant(NumElems, DL, IdxTy));
17207 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
17208 // Since we now have shorter input vectors, adjust the offset of the
17209 // second vector's start.
17210 Vec2Offset = NumElems;
17211 } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
17212 // VecIn1 is wider than the output, and we have another, possibly
17213 // smaller input. Pad the smaller input with undefs, shuffle at the
17214 // input vector width, and extract the output.
17215 // The shuffle type is different than VT, so check legality again.
17216 if (LegalOperations &&
17218 return SDValue();
17219
17220 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
17221 // lower it back into a BUILD_VECTOR. So if the inserted type is
17222 // illegal, don't even try.
17223 if (InVT1 != InVT2) {
17224 if (!TLI.isTypeLegal(InVT2))
17225 return SDValue();
17226 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
17227 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
17228 }
17229 ShuffleNumElems = NumElems * 2;
17230 } else {
17231 // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
17232 // than VecIn1. We can't handle this for now - this case will disappear
17233 // when we start sorting the vectors by type.
17234 return SDValue();
17235 }
17236 } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
17237 InVT1.getSizeInBits() == VT.getSizeInBits()) {
17238 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
17239 ConcatOps[0] = VecIn2;
17240 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17241 } else {
17242 // TODO: Support cases where the length mismatch isn't exactly by a
17243 // factor of 2.
17244 // TODO: Move this check upwards, so that if we have bad type
17245 // mismatches, we don't create any DAG nodes.
17246 return SDValue();
17247 }
17248 }
17249
17250 // Initialize mask to undef.
17251 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
17252
17253 // Only need to run up to the number of elements actually used, not the
17254 // total number of elements in the shuffle - if we are shuffling a wider
17255 // vector, the high lanes should be set to undef.
17256 for (unsigned i = 0; i != NumElems; ++i) {
17257 if (VectorMask[i] <= 0)
17258 continue;
17259
17260 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
17261 if (VectorMask[i] == (int)LeftIdx) {
17262 Mask[i] = ExtIndex;
17263 } else if (VectorMask[i] == (int)LeftIdx + 1) {
17264 Mask[i] = Vec2Offset + ExtIndex;
17265 }
17266 }
17267
17268 // The type the input vectors may have changed above.
17269 InVT1 = VecIn1.getValueType();
17270
17271 // If we already have a VecIn2, it should have the same type as VecIn1.
17272 // If we don't, get an undef/zero vector of the appropriate type.
17273 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
17274 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
17275
17276 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
17277 if (ShuffleNumElems > NumElems)
17278 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
17279
17280 return Shuffle;
17281}
17282
17284 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
17285
17286 // First, determine where the build vector is not undef.
17287 // TODO: We could extend this to handle zero elements as well as undefs.
17288 int NumBVOps = BV->getNumOperands();
17289 int ZextElt = -1;
17290 for (int i = 0; i != NumBVOps; ++i) {
17291 SDValue Op = BV->getOperand(i);
17292 if (Op.isUndef())
17293 continue;
17294 if (ZextElt == -1)
17295 ZextElt = i;
17296 else
17297 return SDValue();
17298 }
17299 // Bail out if there's no non-undef element.
17300 if (ZextElt == -1)
17301 return SDValue();
17302
17303 // The build vector contains some number of undef elements and exactly
17304 // one other element. That other element must be a zero-extended scalar
17305 // extracted from a vector at a constant index to turn this into a shuffle.
17306 // Also, require that the build vector does not implicitly truncate/extend
17307 // its elements.
17308 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
17309 EVT VT = BV->getValueType(0);
17310 SDValue Zext = BV->getOperand(ZextElt);
17311 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
17313 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
17315 return SDValue();
17316
17317 // The zero-extend must be a multiple of the source size, and we must be
17318 // building a vector of the same size as the source of the extract element.
17319 SDValue Extract = Zext.getOperand(0);
17320 unsigned DestSize = Zext.getValueSizeInBits();
17321 unsigned SrcSize = Extract.getValueSizeInBits();
17322 if (DestSize % SrcSize != 0 ||
17323 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
17324 return SDValue();
17325
17326 // Create a shuffle mask that will combine the extracted element with zeros
17327 // and undefs.
17328 int ZextRatio = DestSize / SrcSize;
17329 int NumMaskElts = NumBVOps * ZextRatio;
17330 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
17331 for (int i = 0; i != NumMaskElts; ++i) {
17332 if (i / ZextRatio == ZextElt) {
17333 // The low bits of the (potentially translated) extracted element map to
17334 // the source vector. The high bits map to zero. We will use a zero vector
17335 // as the 2nd source operand of the shuffle, so use the 1st element of
17336 // that vector (mask value is number-of-elements) for the high bits.
17337 if (i % ZextRatio == 0)
17338 ShufMask[i] = Extract.getConstantOperandVal(1);
17339 else
17340 ShufMask[i] = NumMaskElts;
17341 }
17342
17343 // Undef elements of the build vector remain undef because we initialize
17344 // the shuffle mask with -1.
17345 }
17346
17347 // Turn this into a shuffle with zero if that's legal.
17348 EVT VecVT = Extract.getOperand(0).getValueType();
17349 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
17350 return SDValue();
17351
17352 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
17353 // bitcast (shuffle V, ZeroVec, VectorMask)
17354 SDLoc DL(BV);
17355 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
17356 SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
17357 ShufMask);
17358 return DAG.getBitcast(VT, Shuf);
17359}
17360
17361// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
17362// operations. If the types of the vectors we're extracting from allow it,
17363// turn this into a vector_shuffle node.
17364SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
17365 SDLoc DL(N);
17366 EVT VT = N->getValueType(0);
17367
17368 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
17369 if (!isTypeLegal(VT))
17370 return SDValue();
17371
17373 return V;
17374
17375 // May only combine to shuffle after legalize if shuffle is legal.
17376 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
17377 return SDValue();
17378
17379 bool UsesZeroVector = false;
17380 unsigned NumElems = N->getNumOperands();
17381
17382 // Record, for each element of the newly built vector, which input vector
17383 // that element comes from. -1 stands for undef, 0 for the zero vector,
17384 // and positive values for the input vectors.
17385 // VectorMask maps each element to its vector number, and VecIn maps vector
17386 // numbers to their initial SDValues.
17387
17388 SmallVector<int, 8> VectorMask(NumElems, -1);
17390 VecIn.push_back(SDValue());
17391
17392 for (unsigned i = 0; i != NumElems; ++i) {
17393 SDValue Op = N->getOperand(i);
17394
17395 if (Op.isUndef())
17396 continue;
17397
17398 // See if we can use a blend with a zero vector.
17399 // TODO: Should we generalize this to a blend with an arbitrary constant
17400 // vector?
17401 if (isNullConstant(Op) || isNullFPConstant(Op)) {
17402 UsesZeroVector = true;
17403 VectorMask[i] = 0;
17404 continue;
17405 }
17406
17407 // Not an undef or zero. If the input is something other than an
17408 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
17409 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17410 !isa<ConstantSDNode>(Op.getOperand(1)))
17411 return SDValue();
17412 SDValue ExtractedFromVec = Op.getOperand(0);
17413
17414 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
17415 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
17416 return SDValue();
17417
17418 // All inputs must have the same element type as the output.
17419 if (VT.getVectorElementType() !=
17420 ExtractedFromVec.getValueType().getVectorElementType())
17421 return SDValue();
17422
17423 // Have we seen this input vector before?
17424 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
17425 // a map back from SDValues to numbers isn't worth it.
17426 unsigned Idx = std::distance(
17427 VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
17428 if (Idx == VecIn.size())
17429 VecIn.push_back(ExtractedFromVec);
17430
17431 VectorMask[i] = Idx;
17432 }
17433
17434 // If we didn't find at least one input vector, bail out.
17435 if (VecIn.size() < 2)
17436 return SDValue();
17437
17438 // If all the Operands of BUILD_VECTOR extract from same
17439 // vector, then split the vector efficiently based on the maximum
17440 // vector access index and adjust the VectorMask and
17441 // VecIn accordingly.
17442 bool DidSplitVec = false;
17443 if (VecIn.size() == 2) {
17444 unsigned MaxIndex = 0;
17445 unsigned NearestPow2 = 0;
17446 SDValue Vec = VecIn.back();
17447 EVT InVT = Vec.getValueType();
17448 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17449 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
17450
17451 for (unsigned i = 0; i < NumElems; i++) {
17452 if (VectorMask[i] <= 0)
17453 continue;
17454 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
17455 IndexVec[i] = Index;
17456 MaxIndex = std::max(MaxIndex, Index);
17457 }
17458
17459 NearestPow2 = PowerOf2Ceil(MaxIndex);
17460 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
17461 NumElems * 2 < NearestPow2) {
17462 unsigned SplitSize = NearestPow2 / 2;
17463 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
17464 InVT.getVectorElementType(), SplitSize);
17465 if (TLI.isTypeLegal(SplitVT)) {
17466 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17467 DAG.getConstant(SplitSize, DL, IdxTy));
17468 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17469 DAG.getConstant(0, DL, IdxTy));
17470 VecIn.pop_back();
17471 VecIn.push_back(VecIn1);
17472 VecIn.push_back(VecIn2);
17473 DidSplitVec = true;
17474
17475 for (unsigned i = 0; i < NumElems; i++) {
17476 if (VectorMask[i] <= 0)
17477 continue;
17478 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
17479 }
17480 }
17481 }
17482 }
17483
17484 // TODO: We want to sort the vectors by descending length, so that adjacent
17485 // pairs have similar length, and the longer vector is always first in the
17486 // pair.
17487
17488 // TODO: Should this fire if some of the input vectors has illegal type (like
17489 // it does now), or should we let legalization run its course first?
17490
17491 // Shuffle phase:
17492 // Take pairs of vectors, and shuffle them so that the result has elements
17493 // from these vectors in the correct places.
17494 // For example, given:
17495 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
17496 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
17497 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
17498 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
17499 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
17500 // We will generate:
17501 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
17502 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
17503 SmallVector<SDValue, 4> Shuffles;
17504 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
17505 unsigned LeftIdx = 2 * In + 1;
17506 SDValue VecLeft = VecIn[LeftIdx];
17507 SDValue VecRight =
17508 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
17509
17510 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
17511 VecRight, LeftIdx, DidSplitVec))
17512 Shuffles.push_back(Shuffle);
17513 else
17514 return SDValue();
17515 }
17516
17517 // If we need the zero vector as an "ingredient" in the blend tree, add it
17518 // to the list of shuffles.
17519 if (UsesZeroVector)
17520 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
17521 : DAG.getConstantFP(0.0, DL, VT));
17522
17523 // If we only have one shuffle, we're done.
17524 if (Shuffles.size() == 1)
17525 return Shuffles[0];
17526
17527 // Update the vector mask to point to the post-shuffle vectors.
17528 for (int &Vec : VectorMask)
17529 if (Vec == 0)
17530 Vec = Shuffles.size() - 1;
17531 else
17532 Vec = (Vec - 1) / 2;
17533
17534 // More than one shuffle. Generate a binary tree of blends, e.g. if from
17535 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
17536 // generate:
17537 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
17538 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
17539 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
17540 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
17541 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
17542 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
17543 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
17544
17545 // Make sure the initial size of the shuffle list is even.
17546 if (Shuffles.size() % 2)
17547 Shuffles.push_back(DAG.getUNDEF(VT));
17548
17549 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
17550 if (CurSize % 2) {
17551 Shuffles[CurSize] = DAG.getUNDEF(VT);
17552 CurSize++;
17553 }
17554 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
17555 int Left = 2 * In;
17556 int Right = 2 * In + 1;
17557 SmallVector<int, 8> Mask(NumElems, -1);
17558 for (unsigned i = 0; i != NumElems; ++i) {
17559 if (VectorMask[i] == Left) {
17560 Mask[i] = i;
17561 VectorMask[i] = In;
17562 } else if (VectorMask[i] == Right) {
17563 Mask[i] = i + NumElems;
17564 VectorMask[i] = In;
17565 }
17566 }
17567
17568 Shuffles[In] =
17569 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
17570 }
17571 }
17572 return Shuffles[0];
17573}
17574
17575// Try to turn a build vector of zero extends of extract vector elts into a
17576// a vector zero extend and possibly an extract subvector.
17577// TODO: Support sign extend?
17578// TODO: Allow undef elements?
17579SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
17580 if (LegalOperations)
17581 return SDValue();
17582
17583 EVT VT = N->getValueType(0);
17584
17585 bool FoundZeroExtend = false;
17586 SDValue Op0 = N->getOperand(0);
17587 auto checkElem = [&](SDValue Op) -> int64_t {
17588 unsigned Opc = Op.getOpcode();
17589 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
17590 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
17591 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17592 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
17593 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
17594 return C->getZExtValue();
17595 return -1;
17596 };
17597
17598 // Make sure the first element matches
17599 // (zext (extract_vector_elt X, C))
17600 int64_t Offset = checkElem(Op0);
17601 if (Offset < 0)
17602 return SDValue();
17603
17604 unsigned NumElems = N->getNumOperands();
17605 SDValue In = Op0.getOperand(0).getOperand(0);
17606 EVT InSVT = In.getValueType().getScalarType();
17607 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
17608
17609 // Don't create an illegal input type after type legalization.
17610 if (LegalTypes && !TLI.isTypeLegal(InVT))
17611 return SDValue();
17612
17613 // Ensure all the elements come from the same vector and are adjacent.
17614 for (unsigned i = 1; i != NumElems; ++i) {
17615 if ((Offset + i) != checkElem(N->getOperand(i)))
17616 return SDValue();
17617 }
17618
17619 SDLoc DL(N);
17620 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
17621 Op0.getOperand(0).getOperand(1));
17622 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
17623 VT, In);
17624}
17625
17626SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
17627 EVT VT = N->getValueType(0);
17628
17629 // A vector built entirely of undefs is undef.
17631 return DAG.getUNDEF(VT);
17632
17633 // If this is a splat of a bitcast from another vector, change to a
17634 // concat_vector.
17635 // For example:
17636 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
17637 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
17638 //
17639 // If X is a build_vector itself, the concat can become a larger build_vector.
17640 // TODO: Maybe this is useful for non-splat too?
17641 if (!LegalOperations) {
17642 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
17643 Splat = peekThroughBitcasts(Splat);
17644 EVT SrcVT = Splat.getValueType();
17645 if (SrcVT.isVector()) {
17646 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
17647 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
17648 SrcVT.getVectorElementType(), NumElts);
17649 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
17650 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
17652 NewVT, Ops);
17653 return DAG.getBitcast(VT, Concat);
17654 }
17655 }
17656 }
17657 }
17658
17659 // Check if we can express BUILD VECTOR via subvector extract.
17660 if (!LegalTypes && (N->getNumOperands() > 1)) {
17661 SDValue Op0 = N->getOperand(0);
17662 auto checkElem = [&](SDValue Op) -> uint64_t {
17663 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
17664 (Op0.getOperand(0) == Op.getOperand(0)))
17665 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
17666 return CNode->getZExtValue();
17667 return -1;
17668 };
17669
17670 int Offset = checkElem(Op0);
17671 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
17672 if (Offset + i != checkElem(N->getOperand(i))) {
17673 Offset = -1;
17674 break;
17675 }
17676 }
17677
17678 if ((Offset == 0) &&
17679 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
17680 return Op0.getOperand(0);
17681 if ((Offset != -1) &&
17682 ((Offset % N->getValueType(0).getVectorNumElements()) ==
17683 0)) // IDX must be multiple of output size.
17684 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
17685 Op0.getOperand(0), Op0.getOperand(1));
17686 }
17687
17688 if (SDValue V = convertBuildVecZextToZext(N))
17689 return V;
17690
17691 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
17692 return V;
17693
17694 if (SDValue V = reduceBuildVecToShuffle(N))
17695 return V;
17696
17697 return SDValue();
17698}
17699
17701 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17702 EVT OpVT = N->getOperand(0).getValueType();
17703
17704 // If the operands are legal vectors, leave them alone.
17705 if (TLI.isTypeLegal(OpVT))
17706 return SDValue();
17707
17708 SDLoc DL(N);
17709 EVT VT = N->getValueType(0);
17711
17712 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
17713 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17714
17715 // Keep track of what we encounter.
17716 bool AnyInteger = false;
17717 bool AnyFP = false;
17718 for (const SDValue &Op : N->ops()) {
17719 if (ISD::BITCAST == Op.getOpcode() &&
17720 !Op.getOperand(0).getValueType().isVector())
17721 Ops.push_back(Op.getOperand(0));
17722 else if (ISD::UNDEF == Op.getOpcode())
17723 Ops.push_back(ScalarUndef);
17724 else
17725 return SDValue();
17726
17727 // Note whether we encounter an integer or floating point scalar.
17728 // If it's neither, bail out, it could be something weird like x86mmx.
17729 EVT LastOpVT = Ops.back().getValueType();
17730 if (LastOpVT.isFloatingPoint())
17731 AnyFP = true;
17732 else if (LastOpVT.isInteger())
17733 AnyInteger = true;
17734 else
17735 return SDValue();
17736 }
17737
17738 // If any of the operands is a floating point scalar bitcast to a vector,
17739 // use floating point types throughout, and bitcast everything.
17740 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
17741 if (AnyFP) {
17743 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17744 if (AnyInteger) {
17745 for (SDValue &Op : Ops) {
17746 if (Op.getValueType() == SVT)
17747 continue;
17748 if (Op.isUndef())
17749 Op = ScalarUndef;
17750 else
17751 Op = DAG.getBitcast(SVT, Op);
17752 }
17753 }
17754 }
17755
17756 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
17757 VT.getSizeInBits() / SVT.getSizeInBits());
17758 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
17759}
17760
17761// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
17762// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
17763// most two distinct vectors the same size as the result, attempt to turn this
17764// into a legal shuffle.
17766 EVT VT = N->getValueType(0);
17767 EVT OpVT = N->getOperand(0).getValueType();
17768 int NumElts = VT.getVectorNumElements();
17769 int NumOpElts = OpVT.getVectorNumElements();
17770
17771 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
17773
17774 for (SDValue Op : N->ops()) {
17775 Op = peekThroughBitcasts(Op);
17776
17777 // UNDEF nodes convert to UNDEF shuffle mask values.
17778 if (Op.isUndef()) {
17779 Mask.append((unsigned)NumOpElts, -1);
17780 continue;
17781 }
17782
17783 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17784 return SDValue();
17785
17786 // What vector are we extracting the subvector from and at what index?
17787 SDValue ExtVec = Op.getOperand(0);
17788
17789 // We want the EVT of the original extraction to correctly scale the
17790 // extraction index.
17791 EVT ExtVT = ExtVec.getValueType();
17792 ExtVec = peekThroughBitcasts(ExtVec);
17793
17794 // UNDEF nodes convert to UNDEF shuffle mask values.
17795 if (ExtVec.isUndef()) {
17796 Mask.append((unsigned)NumOpElts, -1);
17797 continue;
17798 }
17799
17800 if (!isa<ConstantSDNode>(Op.getOperand(1)))
17801 return SDValue();
17802 int ExtIdx = Op.getConstantOperandVal(1);
17803
17804 // Ensure that we are extracting a subvector from a vector the same
17805 // size as the result.
17806 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
17807 return SDValue();
17808
17809 // Scale the subvector index to account for any bitcast.
17810 int NumExtElts = ExtVT.getVectorNumElements();
17811 if (0 == (NumExtElts % NumElts))
17812 ExtIdx /= (NumExtElts / NumElts);
17813 else if (0 == (NumElts % NumExtElts))
17814 ExtIdx *= (NumElts / NumExtElts);
17815 else
17816 return SDValue();
17817
17818 // At most we can reference 2 inputs in the final shuffle.
17819 if (SV0.isUndef() || SV0 == ExtVec) {
17820 SV0 = ExtVec;
17821 for (int i = 0; i != NumOpElts; ++i)
17822 Mask.push_back(i + ExtIdx);
17823 } else if (SV1.isUndef() || SV1 == ExtVec) {
17824 SV1 = ExtVec;
17825 for (int i = 0; i != NumOpElts; ++i)
17826 Mask.push_back(i + ExtIdx + NumElts);
17827 } else {
17828 return SDValue();
17829 }
17830 }
17831
17832 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
17833 return SDValue();
17834
17835 return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
17836 DAG.getBitcast(VT, SV1), Mask);
17837}
17838
17839SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
17840 // If we only have one input vector, we don't need to do any concatenation.
17841 if (N->getNumOperands() == 1)
17842 return N->getOperand(0);
17843
17844 // Check if all of the operands are undefs.
17845 EVT VT = N->getValueType(0);
17847 return DAG.getUNDEF(VT);
17848
17849 // Optimize concat_vectors where all but the first of the vectors are undef.
17850 if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
17851 return Op.isUndef();
17852 })) {
17853 SDValue In = N->getOperand(0);
17854 assert(In.getValueType().isVector() && "Must concat vectors");
17855
17857
17858 // concat_vectors(scalar_to_vector(scalar), undef) ->
17859 // scalar_to_vector(scalar)
17860 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17861 Scalar.hasOneUse()) {
17862 EVT SVT = Scalar.getValueType().getVectorElementType();
17863 if (SVT == Scalar.getOperand(0).getValueType())
17864 Scalar = Scalar.getOperand(0);
17865 }
17866
17867 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
17868 if (!Scalar.getValueType().isVector()) {
17869 // If the bitcast type isn't legal, it might be a trunc of a legal type;
17870 // look through the trunc so we can still do the transform:
17871 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
17872 if (Scalar->getOpcode() == ISD::TRUNCATE &&
17873 !TLI.isTypeLegal(Scalar.getValueType()) &&
17874 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
17875 Scalar = Scalar->getOperand(0);
17876
17877 EVT SclTy = Scalar.getValueType();
17878
17879 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
17880 return SDValue();
17881
17882 // Bail out if the vector size is not a multiple of the scalar size.
17883 if (VT.getSizeInBits() % SclTy.getSizeInBits())
17884 return SDValue();
17885
17886 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
17887 if (VNTNumElms < 2)
17888 return SDValue();
17889
17890 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
17891 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
17892 return SDValue();
17893
17894 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
17895 return DAG.getBitcast(VT, Res);
17896 }
17897 }
17898
17899 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
17900 // We have already tested above for an UNDEF only concatenation.
17901 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
17902 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
17903 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
17904 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
17905 };
17906 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
17908 EVT SVT = VT.getScalarType();
17909
17910 EVT MinVT = SVT;
17911 if (!SVT.isFloatingPoint()) {
17912 // If BUILD_VECTOR are from built from integer, they may have different
17913 // operand types. Get the smallest type and truncate all operands to it.
17914 bool FoundMinVT = false;
17915 for (const SDValue &Op : N->ops())
17916 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17917 EVT OpSVT = Op.getOperand(0).getValueType();
17918 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
17919 FoundMinVT = true;
17920 }
17921 assert(FoundMinVT && "Concat vector type mismatch");
17922 }
17923
17924 for (const SDValue &Op : N->ops()) {
17925 EVT OpVT = Op.getValueType();
17926 unsigned NumElts = OpVT.getVectorNumElements();
17927
17928 if (ISD::UNDEF == Op.getOpcode())
17929 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
17930
17931 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17932 if (SVT.isFloatingPoint()) {
17933 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
17934 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
17935 } else {
17936 for (unsigned i = 0; i != NumElts; ++i)
17937 Opnds.push_back(
17938 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
17939 }
17940 }
17941 }
17942
17943 assert(VT.getVectorNumElements() == Opnds.size() &&
17944 "Concat vector type mismatch");
17945 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
17946 }
17947
17948 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
17950 return V;
17951
17952 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
17953 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
17955 return V;
17956
17957 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
17958 // nodes often generate nop CONCAT_VECTOR nodes.
17959 // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
17960 // place the incoming vectors at the exact same location.
17961 SDValue SingleSource = SDValue();
17962 unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
17963
17964 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17965 SDValue Op = N->getOperand(i);
17966
17967 if (Op.isUndef())
17968 continue;
17969
17970 // Check if this is the identity extract:
17971 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17972 return SDValue();
17973
17974 // Find the single incoming vector for the extract_subvector.
17975 if (SingleSource.getNode()) {
17976 if (Op.getOperand(0) != SingleSource)
17977 return SDValue();
17978 } else {
17979 SingleSource = Op.getOperand(0);
17980
17981 // Check the source type is the same as the type of the result.
17982 // If not, this concat may extend the vector, so we can not
17983 // optimize it away.
17984 if (SingleSource.getValueType() != N->getValueType(0))
17985 return SDValue();
17986 }
17987
17988 auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17989 // The extract index must be constant.
17990 if (!CS)
17991 return SDValue();
17992
17993 // Check that we are reading from the identity index.
17994 unsigned IdentityIndex = i * PartNumElem;
17995 if (CS->getAPIntValue() != IdentityIndex)
17996 return SDValue();
17997 }
17998
17999 if (SingleSource.getNode())
18000 return SingleSource;
18001
18002 return SDValue();
18003}
18004
18006 SelectionDAG &DAG) {
18007 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18008 SDValue BinOp = Extract->getOperand(0);
18009 unsigned BinOpcode = BinOp.getOpcode();
18010 if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
18011 return SDValue();
18012
18013 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
18014 SDValue Index = Extract->getOperand(1);
18015 EVT VT = Extract->getValueType(0);
18016
18017 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
18018 // if the source subvector is the same type as the one being extracted.
18019 auto GetSubVector = [VT, Index](SDValue V) -> SDValue {
18020 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
18021 V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) {
18022 return V.getOperand(1);
18023 }
18024 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18025 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
18026 V.getOperand(0).getValueType() == VT &&
18027 (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) {
18028 uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements();
18029 return V.getOperand(SubIdx);
18030 }
18031 return SDValue();
18032 };
18033 SDValue Sub0 = GetSubVector(Bop0);
18034 SDValue Sub1 = GetSubVector(Bop1);
18035
18036 // TODO: We could handle the case where only 1 operand is being inserted by
18037 // creating an extract of the other operand, but that requires checking
18038 // number of uses and/or costs.
18039 if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT))
18040 return SDValue();
18041
18042 // We are inserting both operands of the wide binop only to extract back
18043 // to the narrow vector size. Eliminate all of the insert/extract:
18044 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
18045 return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1,
18046 BinOp->getFlags());
18047}
18048
18049/// If we are extracting a subvector produced by a wide binary operator try
18050/// to use a narrow binary operator and/or avoid concatenation and extraction.
18052 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
18053 // some of these bailouts with other transforms.
18054
18055 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
18056 return V;
18057
18058 // The extract index must be a constant, so we can map it to a concat operand.
18059 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18060 if (!ExtractIndexC)
18061 return SDValue();
18062
18063 // We are looking for an optionally bitcasted wide vector binary operator
18064 // feeding an extract subvector.
18065 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18066 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
18067 unsigned BOpcode = BinOp.getOpcode();
18068 if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
18069 return SDValue();
18070
18071 // The binop must be a vector type, so we can extract some fraction of it.
18072 EVT WideBVT = BinOp.getValueType();
18073 if (!WideBVT.isVector())
18074 return SDValue();
18075
18076 EVT VT = Extract->getValueType(0);
18077 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
18078 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
18079 "Extract index is not a multiple of the vector length.");
18080
18081 // Bail out if this is not a proper multiple width extraction.
18082 unsigned WideWidth = WideBVT.getSizeInBits();
18083 unsigned NarrowWidth = VT.getSizeInBits();
18084 if (WideWidth % NarrowWidth != 0)
18085 return SDValue();
18086
18087 // Bail out if we are extracting a fraction of a single operation. This can
18088 // occur because we potentially looked through a bitcast of the binop.
18089 unsigned NarrowingRatio = WideWidth / NarrowWidth;
18090 unsigned WideNumElts = WideBVT.getVectorNumElements();
18091 if (WideNumElts % NarrowingRatio != 0)
18092 return SDValue();
18093
18094 // Bail out if the target does not support a narrower version of the binop.
18095 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
18096 WideNumElts / NarrowingRatio);
18097 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
18098 return SDValue();
18099
18100 // If extraction is cheap, we don't need to look at the binop operands
18101 // for concat ops. The narrow binop alone makes this transform profitable.
18102 // We can't just reuse the original extract index operand because we may have
18103 // bitcasted.
18104 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
18105 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
18106 EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
18107 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
18108 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
18109 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
18110 SDLoc DL(Extract);
18111 SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
18112 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18113 BinOp.getOperand(0), NewExtIndex);
18114 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18115 BinOp.getOperand(1), NewExtIndex);
18116 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
18117 BinOp.getNode()->getFlags());
18118 return DAG.getBitcast(VT, NarrowBinOp);
18119 }
18120
18121 // Only handle the case where we are doubling and then halving. A larger ratio
18122 // may require more than two narrow binops to replace the wide binop.
18123 if (NarrowingRatio != 2)
18124 return SDValue();
18125
18126 // TODO: The motivating case for this transform is an x86 AVX1 target. That
18127 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
18128 // flavors, but no other 256-bit integer support. This could be extended to
18129 // handle any binop, but that may require fixing/adding other folds to avoid
18130 // codegen regressions.
18131 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
18132 return SDValue();
18133
18134 // We need at least one concatenation operation of a binop operand to make
18135 // this transform worthwhile. The concat must double the input vector sizes.
18136 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
18137 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
18138 return V.getOperand(ConcatOpNum);
18139 return SDValue();
18140 };
18141 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
18142 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
18143
18144 if (SubVecL || SubVecR) {
18145 // If a binop operand was not the result of a concat, we must extract a
18146 // half-sized operand for our new narrow binop:
18147 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
18148 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
18149 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
18150 SDLoc DL(Extract);
18151 SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
18152 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
18153 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18154 BinOp.getOperand(0), IndexC);
18155
18156 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
18157 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18158 BinOp.getOperand(1), IndexC);
18159
18160 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
18161 return DAG.getBitcast(VT, NarrowBinOp);
18162 }
18163
18164 return SDValue();
18165}
18166
18167/// If we are extracting a subvector from a wide vector load, convert to a
18168/// narrow load to eliminate the extraction:
18169/// (extract_subvector (load wide vector)) --> (load narrow vector)
18171 // TODO: Add support for big-endian. The offset calculation must be adjusted.
18172 if (DAG.getDataLayout().isBigEndian())
18173 return SDValue();
18174
18175 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
18176 auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18177 if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
18178 return SDValue();
18179
18180 // Allow targets to opt-out.
18181 EVT VT = Extract->getValueType(0);
18182 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18183 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
18184 return SDValue();
18185
18186 // The narrow load will be offset from the base address of the old load if
18187 // we are extracting from something besides index 0 (little-endian).
18188 SDLoc DL(Extract);
18189 SDValue BaseAddr = Ld->getOperand(1);
18190 unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
18191
18192 // TODO: Use "BaseIndexOffset" to make this more effective.
18193 SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
18196 VT.getStoreSize());
18197 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
18198 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
18199 return NewLd;
18200}
18201
18202SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
18203 EVT NVT = N->getValueType(0);
18204 SDValue V = N->getOperand(0);
18205
18206 // Extract from UNDEF is UNDEF.
18207 if (V.isUndef())
18208 return DAG.getUNDEF(NVT);
18209
18211 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
18212 return NarrowLoad;
18213
18214 // Combine an extract of an extract into a single extract_subvector.
18215 // ext (ext X, C), 0 --> ext X, C
18216 SDValue Index = N->getOperand(1);
18217 if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18218 V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
18220 V.getConstantOperandVal(1)) &&
18222 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
18223 V.getOperand(1));
18224 }
18225 }
18226
18227 // Try to move vector bitcast after extract_subv by scaling extraction index:
18228 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
18229 if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
18230 V.getOperand(0).getValueType().isVector()) {
18231 SDValue SrcOp = V.getOperand(0);
18232 EVT SrcVT = SrcOp.getValueType();
18233 unsigned SrcNumElts = SrcVT.getVectorNumElements();
18234 unsigned DestNumElts = V.getValueType().getVectorNumElements();
18235 if ((SrcNumElts % DestNumElts) == 0) {
18236 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
18237 unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
18238 EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
18239 NewExtNumElts);
18241 unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
18242 SDLoc DL(N);
18243 SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
18244 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
18245 V.getOperand(0), NewIndex);
18246 return DAG.getBitcast(NVT, NewExtract);
18247 }
18248 }
18249 // TODO - handle (DestNumElts % SrcNumElts) == 0
18250 }
18251
18252 // Combine:
18253 // (extract_subvec (concat V1, V2, ...), i)
18254 // Into:
18255 // Vi if possible
18256 // Only operand 0 is checked as 'concat' assumes all inputs of the same
18257 // type.
18258 if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
18259 V.getOperand(0).getValueType() == NVT) {
18260 unsigned Idx = N->getConstantOperandVal(1);
18261 unsigned NumElems = NVT.getVectorNumElements();
18262 assert((Idx % NumElems) == 0 &&
18263 "IDX in concat is not a multiple of the result vector length.");
18264 return V->getOperand(Idx / NumElems);
18265 }
18266
18267 V = peekThroughBitcasts(V);
18268
18269 // If the input is a build vector. Try to make a smaller build vector.
18270 if (V.getOpcode() == ISD::BUILD_VECTOR) {
18271 if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
18272 EVT InVT = V.getValueType();
18273 unsigned ExtractSize = NVT.getSizeInBits();
18274 unsigned EltSize = InVT.getScalarSizeInBits();
18275 // Only do this if we won't split any elements.
18276 if (ExtractSize % EltSize == 0) {
18277 unsigned NumElems = ExtractSize / EltSize;
18278 EVT EltVT = InVT.getVectorElementType();
18279 EVT ExtractVT = NumElems == 1 ? EltVT
18281 EltVT, NumElems);
18282 if ((Level < AfterLegalizeDAG ||
18283 (NumElems == 1 ||
18284 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
18285 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
18286 unsigned IdxVal = IdxC->getZExtValue();
18287 IdxVal *= NVT.getScalarSizeInBits();
18288 IdxVal /= EltSize;
18289
18290 if (NumElems == 1) {
18291 SDValue Src = V->getOperand(IdxVal);
18292 if (EltVT != Src.getValueType())
18293 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
18294 return DAG.getBitcast(NVT, Src);
18295 }
18296
18297 // Extract the pieces from the original build_vector.
18298 SDValue BuildVec = DAG.getBuildVector(
18299 ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
18300 return DAG.getBitcast(NVT, BuildVec);
18301 }
18302 }
18303 }
18304 }
18305
18306 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
18307 // Handle only simple case where vector being inserted and vector
18308 // being extracted are of same size.
18309 EVT SmallVT = V.getOperand(1).getValueType();
18310 if (!NVT.bitsEq(SmallVT))
18311 return SDValue();
18312
18313 // Only handle cases where both indexes are constants.
18314 auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
18315 auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
18316 if (InsIdx && ExtIdx) {
18317 // Combine:
18318 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
18319 // Into:
18320 // indices are equal or bit offsets are equal => V1
18321 // otherwise => (extract_subvec V1, ExtIdx)
18322 if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
18323 ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
18324 return DAG.getBitcast(NVT, V.getOperand(1));
18325 return DAG.getNode(
18327 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
18328 Index);
18329 }
18330 }
18331
18332 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
18333 return NarrowBOp;
18334
18336 return SDValue(N, 0);
18337
18338 return SDValue();
18339}
18340
18341/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
18342/// followed by concatenation. Narrow vector ops may have better performance
18343/// than wide ops, and this can unlock further narrowing of other vector ops.
18344/// Targets can invert this transform later if it is not profitable.
18346 SelectionDAG &DAG) {
18347 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
18348 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
18349 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
18350 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
18351 return SDValue();
18352
18353 // Split the wide shuffle mask into halves. Any mask element that is accessing
18354 // operand 1 is offset down to account for narrowing of the vectors.
18355 ArrayRef<int> Mask = Shuf->getMask();
18356 EVT VT = Shuf->getValueType(0);
18357 unsigned NumElts = VT.getVectorNumElements();
18358 unsigned HalfNumElts = NumElts / 2;
18359 SmallVector<int, 16> Mask0(HalfNumElts, -1);
18360 SmallVector<int, 16> Mask1(HalfNumElts, -1);
18361 for (unsigned i = 0; i != NumElts; ++i) {
18362 if (Mask[i] == -1)
18363 continue;
18364 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
18365 if (i < HalfNumElts)
18366 Mask0[i] = M;
18367 else
18368 Mask1[i - HalfNumElts] = M;
18369 }
18370
18371 // Ask the target if this is a valid transform.
18372 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18373 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
18374 HalfNumElts);
18375 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
18376 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
18377 return SDValue();
18378
18379 // shuffle (concat X, undef), (concat Y, undef), Mask -->
18380 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
18381 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
18382 SDLoc DL(Shuf);
18383 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
18384 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
18385 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
18386}
18387
18388// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
18389// or turn a shuffle of a single concat into simpler shuffle then concat.
18391 EVT VT = N->getValueType(0);
18392 unsigned NumElts = VT.getVectorNumElements();
18393
18394 SDValue N0 = N->getOperand(0);
18395 SDValue N1 = N->getOperand(1);
18396 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18397 ArrayRef<int> Mask = SVN->getMask();
18398
18400 EVT ConcatVT = N0.getOperand(0).getValueType();
18401 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
18402 unsigned NumConcats = NumElts / NumElemsPerConcat;
18403
18404 auto IsUndefMaskElt = [](int i) { return i == -1; };
18405
18406 // Special case: shuffle(concat(A,B)) can be more efficiently represented
18407 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
18408 // half vector elements.
18409 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
18410 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
18411 IsUndefMaskElt)) {
18412 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
18413 N0.getOperand(1),
18414 Mask.slice(0, NumElemsPerConcat));
18415 N1 = DAG.getUNDEF(ConcatVT);
18416 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
18417 }
18418
18419 // Look at every vector that's inserted. We're looking for exact
18420 // subvector-sized copies from a concatenated vector
18421 for (unsigned I = 0; I != NumConcats; ++I) {
18422 unsigned Begin = I * NumElemsPerConcat;
18423 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
18424
18425 // Make sure we're dealing with a copy.
18426 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
18427 Ops.push_back(DAG.getUNDEF(ConcatVT));
18428 continue;
18429 }
18430
18431 int OpIdx = -1;
18432 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
18433 if (IsUndefMaskElt(SubMask[i]))
18434 continue;
18435 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
18436 return SDValue();
18437 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
18438 if (0 <= OpIdx && EltOpIdx != OpIdx)
18439 return SDValue();
18440 OpIdx = EltOpIdx;
18441 }
18442 assert(0 <= OpIdx && "Unknown concat_vectors op");
18443
18444 if (OpIdx < (int)N0.getNumOperands())
18445 Ops.push_back(N0.getOperand(OpIdx));
18446 else
18447 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
18448 }
18449
18450 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18451}
18452
18453// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18454// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18455//
18456// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
18457// a simplification in some sense, but it isn't appropriate in general: some
18458// BUILD_VECTORs are substantially cheaper than others. The general case
18459// of a BUILD_VECTOR requires inserting each element individually (or
18460// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
18461// all constants is a single constant pool load. A BUILD_VECTOR where each
18462// element is identical is a splat. A BUILD_VECTOR where most of the operands
18463// are undef lowers to a small number of element insertions.
18464//
18465// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
18466// We don't fold shuffles where one side is a non-zero constant, and we don't
18467// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
18468// non-constant operands. This seems to work out reasonably well in practice.
18470 SelectionDAG &DAG,
18471 const TargetLowering &TLI) {
18472 EVT VT = SVN->getValueType(0);
18473 unsigned NumElts = VT.getVectorNumElements();
18474 SDValue N0 = SVN->getOperand(0);
18475 SDValue N1 = SVN->getOperand(1);
18476
18477 if (!N0->hasOneUse())
18478 return SDValue();
18479
18480 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
18481 // discussed above.
18482 if (!N1.isUndef()) {
18483 if (!N1->hasOneUse())
18484 return SDValue();
18485
18486 bool N0AnyConst = isAnyConstantBuildVector(N0);
18487 bool N1AnyConst = isAnyConstantBuildVector(N1);
18488 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
18489 return SDValue();
18490 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
18491 return SDValue();
18492 }
18493
18494 // If both inputs are splats of the same value then we can safely merge this
18495 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
18496 bool IsSplat = false;
18497 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
18498 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
18499 if (BV0 && BV1)
18500 if (SDValue Splat0 = BV0->getSplatValue())
18501 IsSplat = (Splat0 == BV1->getSplatValue());
18502
18504 SmallSet<SDValue, 16> DuplicateOps;
18505 for (int M : SVN->getMask()) {
18506 SDValue Op = DAG.getUNDEF(VT.getScalarType());
18507 if (M >= 0) {
18508 int Idx = M < (int)NumElts ? M : M - NumElts;
18509 SDValue &S = (M < (int)NumElts ? N0 : N1);
18510 if (S.getOpcode() == ISD::BUILD_VECTOR) {
18511 Op = S.getOperand(Idx);
18512 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18513 SDValue Op0 = S.getOperand(0);
18514 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
18515 } else {
18516 // Operand can't be combined - bail out.
18517 return SDValue();
18518 }
18519 }
18520
18521 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
18522 // generating a splat; semantically, this is fine, but it's likely to
18523 // generate low-quality code if the target can't reconstruct an appropriate
18524 // shuffle.
18525 if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
18526 if (!IsSplat && !DuplicateOps.insert(Op).second)
18527 return SDValue();
18528
18529 Ops.push_back(Op);
18530 }
18531
18532 // BUILD_VECTOR requires all inputs to be of the same type, find the
18533 // maximum type and extend them all.
18534 EVT SVT = VT.getScalarType();
18535 if (SVT.isInteger())
18536 for (SDValue &Op : Ops)
18537 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
18538 if (SVT != VT.getScalarType())
18539 for (SDValue &Op : Ops)
18540 Op = TLI.isZExtFree(Op.getValueType(), SVT)
18541 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
18542 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
18543 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
18544}
18545
18546// Match shuffles that can be converted to any_vector_extend_in_reg.
18547// This is often generated during legalization.
18548// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
18549// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
18551 SelectionDAG &DAG,
18552 const TargetLowering &TLI,
18553 bool LegalOperations) {
18554 EVT VT = SVN->getValueType(0);
18555 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
18556
18557 // TODO Add support for big-endian when we have a test case.
18558 if (!VT.isInteger() || IsBigEndian)
18559 return SDValue();
18560
18561 unsigned NumElts = VT.getVectorNumElements();
18562 unsigned EltSizeInBits = VT.getScalarSizeInBits();
18563 ArrayRef<int> Mask = SVN->getMask();
18564 SDValue N0 = SVN->getOperand(0);
18565
18566 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
18567 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
18568 for (unsigned i = 0; i != NumElts; ++i) {
18569 if (Mask[i] < 0)
18570 continue;
18571 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
18572 continue;
18573 return false;
18574 }
18575 return true;
18576 };
18577
18578 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
18579 // power-of-2 extensions as they are the most likely.
18580 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
18581 // Check for non power of 2 vector sizes
18582 if (NumElts % Scale != 0)
18583 continue;
18584 if (!isAnyExtend(Scale))
18585 continue;
18586
18587 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
18588 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
18589 // Never create an illegal type. Only create unsupported operations if we
18590 // are pre-legalization.
18591 if (TLI.isTypeLegal(OutVT))
18592 if (!LegalOperations ||
18594 return DAG.getBitcast(VT,
18596 SDLoc(SVN), OutVT, N0));
18597 }
18598
18599 return SDValue();
18600}
18601
18602// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
18603// each source element of a large type into the lowest elements of a smaller
18604// destination type. This is often generated during legalization.
18605// If the source node itself was a '*_extend_vector_inreg' node then we should
18606// then be able to remove it.
18608 SelectionDAG &DAG) {
18609 EVT VT = SVN->getValueType(0);
18610 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
18611
18612 // TODO Add support for big-endian when we have a test case.
18613 if (!VT.isInteger() || IsBigEndian)
18614 return SDValue();
18615
18617
18618 unsigned Opcode = N0.getOpcode();
18619 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
18622 return SDValue();
18623
18624 SDValue N00 = N0.getOperand(0);
18625 ArrayRef<int> Mask = SVN->getMask();
18626 unsigned NumElts = VT.getVectorNumElements();
18627 unsigned EltSizeInBits = VT.getScalarSizeInBits();
18628 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
18629 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
18630
18631 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
18632 return SDValue();
18633 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
18634
18635 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
18636 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
18637 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
18638 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
18639 for (unsigned i = 0; i != NumElts; ++i) {
18640 if (Mask[i] < 0)
18641 continue;
18642 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
18643 continue;
18644 return false;
18645 }
18646 return true;
18647 };
18648
18649 // At the moment we just handle the case where we've truncated back to the
18650 // same size as before the extension.
18651 // TODO: handle more extension/truncation cases as cases arise.
18652 if (EltSizeInBits != ExtSrcSizeInBits)
18653 return SDValue();
18654
18655 // We can remove *extend_vector_inreg only if the truncation happens at
18656 // the same scale as the extension.
18657 if (isTruncate(ExtScale))
18658 return DAG.getBitcast(VT, N00);
18659
18660 return SDValue();
18661}
18662
18663// Combine shuffles of splat-shuffles of the form:
18664// shuffle (shuffle V, undef, splat-mask), undef, M
18665// If splat-mask contains undef elements, we need to be careful about
18666// introducing undef's in the folded mask which are not the result of composing
18667// the masks of the shuffles.
18669 SelectionDAG &DAG) {
18670 if (!Shuf->getOperand(1).isUndef())
18671 return SDValue();
18672 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18673 if (!Splat || !Splat->isSplat())
18674 return SDValue();
18675
18676 ArrayRef<int> ShufMask = Shuf->getMask();
18677 ArrayRef<int> SplatMask = Splat->getMask();
18678 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
18679
18680 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
18681 // every undef mask element in the splat-shuffle has a corresponding undef
18682 // element in the user-shuffle's mask or if the composition of mask elements
18683 // would result in undef.
18684 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
18685 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
18686 // In this case it is not legal to simplify to the splat-shuffle because we
18687 // may be exposing the users of the shuffle an undef element at index 1
18688 // which was not there before the combine.
18689 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
18690 // In this case the composition of masks yields SplatMask, so it's ok to
18691 // simplify to the splat-shuffle.
18692 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
18693 // In this case the composed mask includes all undef elements of SplatMask
18694 // and in addition sets element zero to undef. It is safe to simplify to
18695 // the splat-shuffle.
18696 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
18697 ArrayRef<int> SplatMask) {
18698 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
18699 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
18700 SplatMask[UserMask[i]] != -1)
18701 return false;
18702 return true;
18703 };
18704 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
18705 return Shuf->getOperand(0);
18706
18707 // Create a new shuffle with a mask that is composed of the two shuffles'
18708 // masks.
18709 SmallVector<int, 32> NewMask;
18710 for (int Idx : ShufMask)
18711 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
18712
18713 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
18714 Splat->getOperand(0), Splat->getOperand(1),
18715 NewMask);
18716}
18717
18718/// If the shuffle mask is taking exactly one element from the first vector
18719/// operand and passing through all other elements from the second vector
18720/// operand, return the index of the mask element that is choosing an element
18721/// from the first operand. Otherwise, return -1.
18723 int MaskSize = Mask.size();
18724 int EltFromOp0 = -1;
18725 // TODO: This does not match if there are undef elements in the shuffle mask.
18726 // Should we ignore undefs in the shuffle mask instead? The trade-off is
18727 // removing an instruction (a shuffle), but losing the knowledge that some
18728 // vector lanes are not needed.
18729 for (int i = 0; i != MaskSize; ++i) {
18730 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
18731 // We're looking for a shuffle of exactly one element from operand 0.
18732 if (EltFromOp0 != -1)
18733 return -1;
18734 EltFromOp0 = i;
18735 } else if (Mask[i] != i + MaskSize) {
18736 // Nothing from operand 1 can change lanes.
18737 return -1;
18738 }
18739 }
18740 return EltFromOp0;
18741}
18742
18743/// If a shuffle inserts exactly one element from a source vector operand into
18744/// another vector operand and we can access the specified element as a scalar,
18745/// then we can eliminate the shuffle.
18747 SelectionDAG &DAG) {
18748 // First, check if we are taking one element of a vector and shuffling that
18749 // element into another vector.
18750 ArrayRef<int> Mask = Shuf->getMask();
18751 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
18752 SDValue Op0 = Shuf->getOperand(0);
18753 SDValue Op1 = Shuf->getOperand(1);
18754 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
18755 if (ShufOp0Index == -1) {
18756 // Commute mask and check again.
18758 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
18759 if (ShufOp0Index == -1)
18760 return SDValue();
18761 // Commute operands to match the commuted shuffle mask.
18762 std::swap(Op0, Op1);
18763 Mask = CommutedMask;
18764 }
18765
18766 // The shuffle inserts exactly one element from operand 0 into operand 1.
18767 // Now see if we can access that element as a scalar via a real insert element
18768 // instruction.
18769 // TODO: We can try harder to locate the element as a scalar. Examples: it
18770 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
18771 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
18772 "Shuffle mask value must be from operand 0");
18773 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
18774 return SDValue();
18775
18776 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
18777 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
18778 return SDValue();
18779
18780 // There's an existing insertelement with constant insertion index, so we
18781 // don't need to check the legality/profitability of a replacement operation
18782 // that differs at most in the constant value. The target should be able to
18783 // lower any of those in a similar way. If not, legalization will expand this
18784 // to a scalar-to-vector plus shuffle.
18785 //
18786 // Note that the shuffle may move the scalar from the position that the insert
18787 // element used. Therefore, our new insert element occurs at the shuffle's
18788 // mask index value, not the insert's index value.
18789 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
18790 SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
18791 Op0.getOperand(2).getValueType());
18792 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
18793 Op1, Op0.getOperand(1), NewInsIndex);
18794}
18795
18796/// If we have a unary shuffle of a shuffle, see if it can be folded away
18797/// completely. This has the potential to lose undef knowledge because the first
18798/// shuffle may not have an undef mask element where the second one does. So
18799/// only call this after doing simplifications based on demanded elements.
18801 // shuf (shuf0 X, Y, Mask0), undef, Mask
18802 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18803 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
18804 return SDValue();
18805
18806 ArrayRef<int> Mask = Shuf->getMask();
18807 ArrayRef<int> Mask0 = Shuf0->getMask();
18808 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
18809 // Ignore undef elements.
18810 if (Mask[i] == -1)
18811 continue;
18812 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
18813
18814 // Is the element of the shuffle operand chosen by this shuffle the same as
18815 // the element chosen by the shuffle operand itself?
18816 if (Mask0[Mask[i]] != Mask0[i])
18817 return SDValue();
18818 }
18819 // Every element of this shuffle is identical to the result of the previous
18820 // shuffle, so we can replace this value.
18821 return Shuf->getOperand(0);
18822}
18823
18824SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
18825 EVT VT = N->getValueType(0);
18826 unsigned NumElts = VT.getVectorNumElements();
18827
18828 SDValue N0 = N->getOperand(0);
18829 SDValue N1 = N->getOperand(1);
18830
18831 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
18832
18833 // Canonicalize shuffle undef, undef -> undef
18834 if (N0.isUndef() && N1.isUndef())
18835 return DAG.getUNDEF(VT);
18836
18837 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18838
18839 // Canonicalize shuffle v, v -> v, undef
18840 if (N0 == N1) {
18841 SmallVector<int, 8> NewMask;
18842 for (unsigned i = 0; i != NumElts; ++i) {
18843 int Idx = SVN->getMaskElt(i);
18844 if (Idx >= (int)NumElts) Idx -= NumElts;
18845 NewMask.push_back(Idx);
18846 }
18847 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
18848 }
18849
18850 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
18851 if (N0.isUndef())
18852 return DAG.getCommutedVectorShuffle(*SVN);
18853
18854 // Remove references to rhs if it is undef
18855 if (N1.isUndef()) {
18856 bool Changed = false;
18857 SmallVector<int, 8> NewMask;
18858 for (unsigned i = 0; i != NumElts; ++i) {
18859 int Idx = SVN->getMaskElt(i);
18860 if (Idx >= (int)NumElts) {
18861 Idx = -1;
18862 Changed = true;
18863 }
18864 NewMask.push_back(Idx);
18865 }
18866 if (Changed)
18867 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
18868 }
18869
18870 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
18871 return InsElt;
18872
18873 // A shuffle of a single vector that is a splatted value can always be folded.
18874 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
18875 return V;
18876
18877 // If it is a splat, check if the argument vector is another splat or a
18878 // build_vector.
18879 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
18880 int SplatIndex = SVN->getSplatIndex();
18881 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
18882 TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
18883 // splat (vector_bo L, R), Index -->
18884 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
18885 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
18886 SDLoc DL(N);
18887 EVT EltVT = VT.getScalarType();
18888 SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
18889 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
18890 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
18891 SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
18892 N0.getNode()->getFlags());
18893 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
18895 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
18896 }
18897
18898 // If this is a bit convert that changes the element type of the vector but
18899 // not the number of vector elements, look through it. Be careful not to
18900 // look though conversions that change things like v4f32 to v2f64.
18901 SDNode *V = N0.getNode();
18902 if (V->getOpcode() == ISD::BITCAST) {
18903 SDValue ConvInput = V->getOperand(0);
18904 if (ConvInput.getValueType().isVector() &&
18905 ConvInput.getValueType().getVectorNumElements() == NumElts)
18906 V = ConvInput.getNode();
18907 }
18908
18909 if (V->getOpcode() == ISD::BUILD_VECTOR) {
18910 assert(V->getNumOperands() == NumElts &&
18911 "BUILD_VECTOR has wrong number of operands");
18912 SDValue Base;
18913 bool AllSame = true;
18914 for (unsigned i = 0; i != NumElts; ++i) {
18915 if (!V->getOperand(i).isUndef()) {
18916 Base = V->getOperand(i);
18917 break;
18918 }
18919 }
18920 // Splat of <u, u, u, u>, return <u, u, u, u>
18921 if (!Base.getNode())
18922 return N0;
18923 for (unsigned i = 0; i != NumElts; ++i) {
18924 if (V->getOperand(i) != Base) {
18925 AllSame = false;
18926 break;
18927 }
18928 }
18929 // Splat of <x, x, x, x>, return <x, x, x, x>
18930 if (AllSame)
18931 return N0;
18932
18933 // Canonicalize any other splat as a build_vector.
18934 SDValue Splatted = V->getOperand(SplatIndex);
18935 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
18936 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
18937
18938 // We may have jumped through bitcasts, so the type of the
18939 // BUILD_VECTOR may not match the type of the shuffle.
18940 if (V->getValueType(0) != VT)
18941 NewBV = DAG.getBitcast(VT, NewBV);
18942 return NewBV;
18943 }
18944 }
18945
18946 // Simplify source operands based on shuffle mask.
18948 return SDValue(N, 0);
18949
18950 // This is intentionally placed after demanded elements simplification because
18951 // it could eliminate knowledge of undef elements created by this shuffle.
18952 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
18953 return ShufOp;
18954
18955 // Match shuffles that can be converted to any_vector_extend_in_reg.
18956 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
18957 return V;
18958
18959 // Combine "truncate_vector_in_reg" style shuffles.
18960 if (SDValue V = combineTruncationShuffle(SVN, DAG))
18961 return V;
18962
18963 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
18964 Level < AfterLegalizeVectorOps &&
18965 (N1.isUndef() ||
18966 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
18967 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
18968 if (SDValue V = partitionShuffleOfConcats(N, DAG))
18969 return V;
18970 }
18971
18972 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18973 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18974 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
18975 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
18976 return Res;
18977
18978 // If this shuffle only has a single input that is a bitcasted shuffle,
18979 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
18980 // back to their original types.
18981 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
18982 N1.isUndef() && Level < AfterLegalizeVectorOps &&
18983 TLI.isTypeLegal(VT)) {
18984 auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
18985 if (Scale == 1)
18986 return SmallVector<int, 8>(Mask.begin(), Mask.end());
18987
18988 SmallVector<int, 8> NewMask;
18989 for (int M : Mask)
18990 for (int s = 0; s != Scale; ++s)
18991 NewMask.push_back(M < 0 ? -1 : Scale * M + s);
18992 return NewMask;
18993 };
18994
18996 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
18997 EVT SVT = VT.getScalarType();
18998 EVT InnerVT = BC0->getValueType(0);
18999 EVT InnerSVT = InnerVT.getScalarType();
19000
19001 // Determine which shuffle works with the smaller scalar type.
19002 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
19003 EVT ScaleSVT = ScaleVT.getScalarType();
19004
19005 if (TLI.isTypeLegal(ScaleVT) &&
19006 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
19007 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
19008 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
19009 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
19010
19011 // Scale the shuffle masks to the smaller scalar type.
19012 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
19013 SmallVector<int, 8> InnerMask =
19014 ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
19015 SmallVector<int, 8> OuterMask =
19016 ScaleShuffleMask(SVN->getMask(), OuterScale);
19017
19018 // Merge the shuffle masks.
19019 SmallVector<int, 8> NewMask;
19020 for (int M : OuterMask)
19021 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
19022
19023 // Test for shuffle mask legality over both commutations.
19024 SDValue SV0 = BC0->getOperand(0);
19025 SDValue SV1 = BC0->getOperand(1);
19026 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
19027 if (!LegalMask) {
19028 std::swap(SV0, SV1);
19030 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
19031 }
19032
19033 if (LegalMask) {
19034 SV0 = DAG.getBitcast(ScaleVT, SV0);
19035 SV1 = DAG.getBitcast(ScaleVT, SV1);
19036 return DAG.getBitcast(
19037 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
19038 }
19039 }
19040 }
19041 }
19042
19043 // Canonicalize shuffles according to rules:
19044 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
19045 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
19046 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
19047 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
19048 N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
19049 TLI.isTypeLegal(VT)) {
19050 // The incoming shuffle must be of the same type as the result of the
19051 // current shuffle.
19052 assert(N1->getOperand(0).getValueType() == VT &&
19053 "Shuffle types don't match");
19054
19055 SDValue SV0 = N1->getOperand(0);
19056 SDValue SV1 = N1->getOperand(1);
19057 bool HasSameOp0 = N0 == SV0;
19058 bool IsSV1Undef = SV1.isUndef();
19059 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
19060 // Commute the operands of this shuffle so that next rule
19061 // will trigger.
19062 return DAG.getCommutedVectorShuffle(*SVN);
19063 }
19064
19065 // Try to fold according to rules:
19066 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19067 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19068 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19069 // Don't try to fold shuffles with illegal type.
19070 // Only fold if this shuffle is the only user of the other shuffle.
19071 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
19072 Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
19073 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
19074
19075 // Don't try to fold splats; they're likely to simplify somehow, or they
19076 // might be free.
19077 if (OtherSV->isSplat())
19078 return SDValue();
19079
19080 // The incoming shuffle must be of the same type as the result of the
19081 // current shuffle.
19082 assert(OtherSV->getOperand(0).getValueType() == VT &&
19083 "Shuffle types don't match");
19084
19085 SDValue SV0, SV1;
19087 // Compute the combined shuffle mask for a shuffle with SV0 as the first
19088 // operand, and SV1 as the second operand.
19089 for (unsigned i = 0; i != NumElts; ++i) {
19090 int Idx = SVN->getMaskElt(i);
19091 if (Idx < 0) {
19092 // Propagate Undef.
19093 Mask.push_back(Idx);
19094 continue;
19095 }
19096
19097 SDValue CurrentVec;
19098 if (Idx < (int)NumElts) {
19099 // This shuffle index refers to the inner shuffle N0. Lookup the inner
19100 // shuffle mask to identify which vector is actually referenced.
19101 Idx = OtherSV->getMaskElt(Idx);
19102 if (Idx < 0) {
19103 // Propagate Undef.
19104 Mask.push_back(Idx);
19105 continue;
19106 }
19107
19108 CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
19109 : OtherSV->getOperand(1);
19110 } else {
19111 // This shuffle index references an element within N1.
19112 CurrentVec = N1;
19113 }
19114
19115 // Simple case where 'CurrentVec' is UNDEF.
19116 if (CurrentVec.isUndef()) {
19117 Mask.push_back(-1);
19118 continue;
19119 }
19120
19121 // Canonicalize the shuffle index. We don't know yet if CurrentVec
19122 // will be the first or second operand of the combined shuffle.
19123 Idx = Idx % NumElts;
19124 if (!SV0.getNode() || SV0 == CurrentVec) {
19125 // Ok. CurrentVec is the left hand side.
19126 // Update the mask accordingly.
19127 SV0 = CurrentVec;
19128 Mask.push_back(Idx);
19129 continue;
19130 }
19131
19132 // Bail out if we cannot convert the shuffle pair into a single shuffle.
19133 if (SV1.getNode() && SV1 != CurrentVec)
19134 return SDValue();
19135
19136 // Ok. CurrentVec is the right hand side.
19137 // Update the mask accordingly.
19138 SV1 = CurrentVec;
19139 Mask.push_back(Idx + NumElts);
19140 }
19141
19142 // Check if all indices in Mask are Undef. In case, propagate Undef.
19143 bool isUndefMask = true;
19144 for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
19145 isUndefMask &= Mask[i] < 0;
19146
19147 if (isUndefMask)
19148 return DAG.getUNDEF(VT);
19149
19150 if (!SV0.getNode())
19151 SV0 = DAG.getUNDEF(VT);
19152 if (!SV1.getNode())
19153 SV1 = DAG.getUNDEF(VT);
19154
19155 // Avoid introducing shuffles with illegal mask.
19156 if (!TLI.isShuffleMaskLegal(Mask, VT)) {
19158
19159 if (!TLI.isShuffleMaskLegal(Mask, VT))
19160 return SDValue();
19161
19162 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
19163 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
19164 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
19165 std::swap(SV0, SV1);
19166 }
19167
19168 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19169 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19170 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19171 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
19172 }
19173
19174 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
19175 return V;
19176
19177 return SDValue();
19178}
19179
19180SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
19181 SDValue InVal = N->getOperand(0);
19182 EVT VT = N->getValueType(0);
19183
19184 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
19185 // with a VECTOR_SHUFFLE and possible truncate.
19186 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
19187 SDValue InVec = InVal->getOperand(0);
19188 SDValue EltNo = InVal->getOperand(1);
19189 auto InVecT = InVec.getValueType();
19190 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
19191 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
19192 int Elt = C0->getZExtValue();
19193 NewMask[0] = Elt;
19194 SDValue Val;
19195 // If we have an implict truncate do truncate here as long as it's legal.
19196 // if it's not legal, this should
19197 if (VT.getScalarType() != InVal.getValueType() &&
19198 InVal.getValueType().isScalarInteger() &&
19199 isTypeLegal(VT.getScalarType())) {
19200 Val =
19201 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
19202 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
19203 }
19204 if (VT.getScalarType() == InVecT.getScalarType() &&
19205 VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
19206 TLI.isShuffleMaskLegal(NewMask, VT)) {
19207 Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
19208 DAG.getUNDEF(InVecT), NewMask);
19209 // If the initial vector is the correct size this shuffle is a
19210 // valid result.
19211 if (VT == InVecT)
19212 return Val;
19213 // If not we must truncate the vector.
19214 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
19215 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
19216 SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
19217 EVT SubVT =
19218 EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
19220 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
19221 ZeroIdx);
19222 return Val;
19223 }
19224 }
19225 }
19226 }
19227
19228 return SDValue();
19229}
19230
19231SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
19232 EVT VT = N->getValueType(0);
19233 SDValue N0 = N->getOperand(0);
19234 SDValue N1 = N->getOperand(1);
19235 SDValue N2 = N->getOperand(2);
19236
19237 // If inserting an UNDEF, just return the original vector.
19238 if (N1.isUndef())
19239 return N0;
19240
19241 // If this is an insert of an extracted vector into an undef vector, we can
19242 // just use the input to the extract.
19243 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19244 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
19245 return N1.getOperand(0);
19246
19247 // If we are inserting a bitcast value into an undef, with the same
19248 // number of elements, just use the bitcast input of the extract.
19249 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
19250 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
19251 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
19253 N1.getOperand(0).getOperand(1) == N2 &&
19255 VT.getVectorNumElements() &&
19257 VT.getSizeInBits()) {
19258 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
19259 }
19260
19261 // If both N1 and N2 are bitcast values on which insert_subvector
19262 // would makes sense, pull the bitcast through.
19263 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
19264 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
19265 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
19266 SDValue CN0 = N0.getOperand(0);
19267 SDValue CN1 = N1.getOperand(0);
19268 EVT CN0VT = CN0.getValueType();
19269 EVT CN1VT = CN1.getValueType();
19270 if (CN0VT.isVector() && CN1VT.isVector() &&
19271 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
19273 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
19274 CN0.getValueType(), CN0, CN1, N2);
19275 return DAG.getBitcast(VT, NewINSERT);
19276 }
19277 }
19278
19279 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
19280 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
19281 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
19282 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
19283 N0.getOperand(1).getValueType() == N1.getValueType() &&
19284 N0.getOperand(2) == N2)
19285 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
19286 N1, N2);
19287
19288 // Eliminate an intermediate insert into an undef vector:
19289 // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
19290 // insert_subvector undef, X, N2
19291 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
19292 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
19293 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
19294 N1.getOperand(1), N2);
19295
19296 if (!isa<ConstantSDNode>(N2))
19297 return SDValue();
19298
19299 uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
19300
19301 // Push subvector bitcasts to the output, adjusting the index as we go.
19302 // insert_subvector(bitcast(v), bitcast(s), c1)
19303 // -> bitcast(insert_subvector(v, s, c2))
19304 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
19305 N1.getOpcode() == ISD::BITCAST) {
19306 SDValue N0Src = peekThroughBitcasts(N0);
19307 SDValue N1Src = peekThroughBitcasts(N1);
19308 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
19309 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
19310 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
19311 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
19312 EVT NewVT;
19313 SDLoc DL(N);
19314 SDValue NewIdx;
19315 MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
19316 LLVMContext &Ctx = *DAG.getContext();
19317 unsigned NumElts = VT.getVectorNumElements();
19318 unsigned EltSizeInBits = VT.getScalarSizeInBits();
19319 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
19320 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
19321 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
19322 NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
19323 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
19324 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
19325 if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
19326 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
19327 NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
19328 }
19329 }
19330 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
19331 SDValue Res = DAG.getBitcast(NewVT, N0Src);
19332 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
19333 return DAG.getBitcast(VT, Res);
19334 }
19335 }
19336 }
19337
19338 // Canonicalize insert_subvector dag nodes.
19339 // Example:
19340 // (insert_subvector (insert_subvector A, Idx0), Idx1)
19341 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
19342 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
19343 N1.getValueType() == N0.getOperand(1).getValueType() &&
19344 isa<ConstantSDNode>(N0.getOperand(2))) {
19345 unsigned OtherIdx = N0.getConstantOperandVal(2);
19346 if (InsIdx < OtherIdx) {
19347 // Swap nodes.
19348 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
19349 N0.getOperand(0), N1, N2);
19350 AddToWorklist(NewOp.getNode());
19351 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
19352 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
19353 }
19354 }
19355
19356 // If the input vector is a concatenation, and the insert replaces
19357 // one of the pieces, we can optimize into a single concat_vectors.
19358 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
19359 N0.getOperand(0).getValueType() == N1.getValueType()) {
19360 unsigned Factor = N1.getValueType().getVectorNumElements();
19361
19362 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
19363 Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
19364
19365 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19366 }
19367
19368 // Simplify source operands based on insertion.
19370 return SDValue(N, 0);
19371
19372 return SDValue();
19373}
19374
19375SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
19376 SDValue N0 = N->getOperand(0);
19377
19378 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
19379 if (N0->getOpcode() == ISD::FP16_TO_FP)
19380 return N0->getOperand(0);
19381
19382 return SDValue();
19383}
19384
19385SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
19386 SDValue N0 = N->getOperand(0);
19387
19388 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
19389 if (N0->getOpcode() == ISD::AND) {
19391 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
19392 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
19393 N0.getOperand(0));
19394 }
19395 }
19396
19397 return SDValue();
19398}
19399
19400SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
19401 SDValue N0 = N->getOperand(0);
19402 EVT VT = N0.getValueType();
19403 unsigned Opcode = N->getOpcode();
19404
19405 // VECREDUCE over 1-element vector is just an extract.
19406 if (VT.getVectorNumElements() == 1) {
19407 SDLoc dl(N);
19408 SDValue Res = DAG.getNode(
19410 DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
19411 if (Res.getValueType() != N->getValueType(0))
19412 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
19413 return Res;
19414 }
19415
19416 // On an boolean vector an and/or reduction is the same as a umin/umax
19417 // reduction. Convert them if the latter is legal while the former isn't.
19418 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
19419 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
19421 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
19422 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
19424 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
19425 }
19426
19427 return SDValue();
19428}
19429
19430/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
19431/// with the destination vector and a zero vector.
19432/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
19433/// vector_shuffle V, Zero, <0, 4, 2, 4>
19434SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
19435 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
19436
19437 EVT VT = N->getValueType(0);
19438 SDValue LHS = N->getOperand(0);
19439 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
19440 SDLoc DL(N);
19441
19442 // Make sure we're not running after operation legalization where it
19443 // may have custom lowered the vector shuffles.
19444 if (LegalOperations)
19445 return SDValue();
19446
19447 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19448 return SDValue();
19449
19450 EVT RVT = RHS.getValueType();
19451 unsigned NumElts = RHS.getNumOperands();
19452
19453 // Attempt to create a valid clear mask, splitting the mask into
19454 // sub elements and checking to see if each is
19455 // all zeros or all ones - suitable for shuffle masking.
19456 auto BuildClearMask = [&](int Split) {
19457 int NumSubElts = NumElts * Split;
19458 int NumSubBits = RVT.getScalarSizeInBits() / Split;
19459
19460 SmallVector<int, 8> Indices;
19461 for (int i = 0; i != NumSubElts; ++i) {
19462 int EltIdx = i / Split;
19463 int SubIdx = i % Split;
19464 SDValue Elt = RHS.getOperand(EltIdx);
19465 if (Elt.isUndef()) {
19466 Indices.push_back(-1);
19467 continue;
19468 }
19469
19470 APInt Bits;
19471 if (isa<ConstantSDNode>(Elt))
19472 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
19473 else if (isa<ConstantFPSDNode>(Elt))
19474 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
19475 else
19476 return SDValue();
19477
19478 // Extract the sub element from the constant bit mask.
19479 if (DAG.getDataLayout().isBigEndian()) {
19480 Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
19481 } else {
19482 Bits.lshrInPlace(SubIdx * NumSubBits);
19483 }
19484
19485 if (Split > 1)
19486 Bits = Bits.trunc(NumSubBits);
19487
19488 if (Bits.isAllOnesValue())
19489 Indices.push_back(i);
19490 else if (Bits == 0)
19491 Indices.push_back(i + NumSubElts);
19492 else
19493 return SDValue();
19494 }
19495
19496 // Let's see if the target supports this vector_shuffle.
19497 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
19498 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
19499 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
19500 return SDValue();
19501
19502 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
19503 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
19504 DAG.getBitcast(ClearVT, LHS),
19505 Zero, Indices));
19506 };
19507
19508 // Determine maximum split level (byte level masking).
19509 int MaxSplit = 1;
19510 if (RVT.getScalarSizeInBits() % 8 == 0)
19511 MaxSplit = RVT.getScalarSizeInBits() / 8;
19512
19513 for (int Split = 1; Split <= MaxSplit; ++Split)
19514 if (RVT.getScalarSizeInBits() % Split == 0)
19515 if (SDValue S = BuildClearMask(Split))
19516 return S;
19517
19518 return SDValue();
19519}
19520
19521/// If a vector binop is performed on splat values, it may be profitable to
19522/// extract, scalarize, and insert/splat.
19524 SDValue N0 = N->getOperand(0);
19525 SDValue N1 = N->getOperand(1);
19526 unsigned Opcode = N->getOpcode();
19527 EVT VT = N->getValueType(0);
19528 EVT EltVT = VT.getVectorElementType();
19529 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19530
19531 // TODO: Remove/replace the extract cost check? If the elements are available
19532 // as scalars, then there may be no extract cost. Should we ask if
19533 // inserting a scalar back into a vector is cheap instead?
19534 int Index0, Index1;
19535 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
19536 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
19537 if (!Src0 || !Src1 || Index0 != Index1 ||
19538 Src0.getValueType().getVectorElementType() != EltVT ||
19539 Src1.getValueType().getVectorElementType() != EltVT ||
19540 !TLI.isExtractVecEltCheap(VT, Index0) ||
19541 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
19542 return SDValue();
19543
19544 SDLoc DL(N);
19545 SDValue IndexC =
19546 DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
19547 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
19548 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
19549 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
19550
19551 // If all lanes but 1 are undefined, no need to splat the scalar result.
19552 // TODO: Keep track of undefs and use that info in the general case.
19553 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
19554 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
19555 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
19556 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
19557 // build_vec ..undef, (bo X, Y), undef...
19559 Ops[Index0] = ScalarBO;
19560 return DAG.getBuildVector(VT, DL, Ops);
19561 }
19562
19563 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
19565 return DAG.getBuildVector(VT, DL, Ops);
19566}
19567
19568/// Visit a binary vector operation, like ADD.
19569SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
19570 assert(N->getValueType(0).isVector() &&
19571 "SimplifyVBinOp only works on vectors!");
19572
19573 SDValue LHS = N->getOperand(0);
19574 SDValue RHS = N->getOperand(1);
19575 SDValue Ops[] = {LHS, RHS};
19576 EVT VT = N->getValueType(0);
19577 unsigned Opcode = N->getOpcode();
19578
19579 // See if we can constant fold the vector operation.
19581 Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
19582 return Fold;
19583
19584 // Move unary shuffles with identical masks after a vector binop:
19585 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
19586 // --> shuffle (VBinOp A, B), Undef, Mask
19587 // This does not require type legality checks because we are creating the
19588 // same types of operations that are in the original sequence. We do have to
19589 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
19590 // though. This code is adapted from the identical transform in instcombine.
19591 if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
19592 Opcode != ISD::UREM && Opcode != ISD::SREM &&
19593 Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
19594 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
19595 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
19596 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
19597 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
19598 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
19599 SDLoc DL(N);
19600 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
19601 RHS.getOperand(0), N->getFlags());
19602 SDValue UndefV = LHS.getOperand(1);
19603 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
19604 }
19605 }
19606
19607 // The following pattern is likely to emerge with vector reduction ops. Moving
19608 // the binary operation ahead of insertion may allow using a narrower vector
19609 // instruction that has better performance than the wide version of the op:
19610 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
19611 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
19612 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
19613 LHS.getOperand(2) == RHS.getOperand(2) &&
19614 (LHS.hasOneUse() || RHS.hasOneUse())) {
19615 SDValue X = LHS.getOperand(1);
19616 SDValue Y = RHS.getOperand(1);
19617 SDValue Z = LHS.getOperand(2);
19618 EVT NarrowVT = X.getValueType();
19619 if (NarrowVT == Y.getValueType() &&
19620 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
19621 // (binop undef, undef) may not return undef, so compute that result.
19622 SDLoc DL(N);
19623 SDValue VecC =
19624 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
19625 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
19626 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
19627 }
19628 }
19629
19630 if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
19631 return V;
19632
19633 return SDValue();
19634}
19635
19636SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
19637 SDValue N2) {
19638 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
19639
19640 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
19641 cast<CondCodeSDNode>(N0.getOperand(2))->get());
19642
19643 // If we got a simplified select_cc node back from SimplifySelectCC, then
19644 // break it down into a new SETCC node, and a new SELECT node, and then return
19645 // the SELECT node, since we were called with a SELECT node.
19646 if (SCC.getNode()) {
19647 // Check to see if we got a select_cc back (to turn into setcc/select).
19648 // Otherwise, just return whatever node we got back, like fabs.
19649 if (SCC.getOpcode() == ISD::SELECT_CC) {
19650 const SDNodeFlags Flags = N0.getNode()->getFlags();
19652 N0.getValueType(),
19653 SCC.getOperand(0), SCC.getOperand(1),
19654 SCC.getOperand(4), Flags);
19655 AddToWorklist(SETCC.getNode());
19656 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
19657 SCC.getOperand(2), SCC.getOperand(3));
19658 SelectNode->setFlags(Flags);
19659 return SelectNode;
19660 }
19661
19662 return SCC;
19663 }
19664 return SDValue();
19665}
19666
19667/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
19668/// being selected between, see if we can simplify the select. Callers of this
19669/// should assume that TheSelect is deleted if this returns true. As such, they
19670/// should return the appropriate thing (e.g. the node) back to the top-level of
19671/// the DAG combiner loop to avoid it being looked at.
19672bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
19673 SDValue RHS) {
19674 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
19675 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
19676 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
19677 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
19678 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
19679 SDValue Sqrt = RHS;
19680 ISD::CondCode CC;
19681 SDValue CmpLHS;
19682 const ConstantFPSDNode *Zero = nullptr;
19683
19684 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
19685 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
19686 CmpLHS = TheSelect->getOperand(0);
19687 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
19688 } else {
19689 // SELECT or VSELECT
19690 SDValue Cmp = TheSelect->getOperand(0);
19691 if (Cmp.getOpcode() == ISD::SETCC) {
19692 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
19693 CmpLHS = Cmp.getOperand(0);
19694 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
19695 }
19696 }
19697 if (Zero && Zero->isZero() &&
19698 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
19699 CC == ISD::SETULT || CC == ISD::SETLT)) {
19700 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
19701 CombineTo(TheSelect, Sqrt);
19702 return true;
19703 }
19704 }
19705 }
19706 // Cannot simplify select with vector condition
19707 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
19708
19709 // If this is a select from two identical things, try to pull the operation
19710 // through the select.
19711 if (LHS.getOpcode() != RHS.getOpcode() ||
19712 !LHS.hasOneUse() || !RHS.hasOneUse())
19713 return false;
19714
19715 // If this is a load and the token chain is identical, replace the select
19716 // of two loads with a load through a select of the address to load from.
19717 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
19718 // constants have been dropped into the constant pool.
19719 if (LHS.getOpcode() == ISD::LOAD) {
19720 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
19721 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
19722
19723 // Token chains must be identical.
19724 if (LHS.getOperand(0) != RHS.getOperand(0) ||
19725 // Do not let this transformation reduce the number of volatile loads.
19726 LLD->isVolatile() || RLD->isVolatile() ||
19727 // FIXME: If either is a pre/post inc/dec load,
19728 // we'd need to split out the address adjustment.
19729 LLD->isIndexed() || RLD->isIndexed() ||
19730 // If this is an EXTLOAD, the VT's must match.
19731 LLD->getMemoryVT() != RLD->getMemoryVT() ||
19732 // If this is an EXTLOAD, the kind of extension must match.
19733 (LLD->getExtensionType() != RLD->getExtensionType() &&
19734 // The only exception is if one of the extensions is anyext.
19735 LLD->getExtensionType() != ISD::EXTLOAD &&
19736 RLD->getExtensionType() != ISD::EXTLOAD) ||
19737 // FIXME: this discards src value information. This is
19738 // over-conservative. It would be beneficial to be able to remember
19739 // both potential memory locations. Since we are discarding
19740 // src value info, don't do the transformation if the memory
19741 // locations are not in the default address space.
19742 LLD->getPointerInfo().getAddrSpace() != 0 ||
19743 RLD->getPointerInfo().getAddrSpace() != 0 ||
19744 // We can't produce a CMOV of a TargetFrameIndex since we won't
19745 // generate the address generation required.
19748 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
19749 LLD->getBasePtr().getValueType()))
19750 return false;
19751
19752 // The loads must not depend on one another.
19753 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
19754 return false;
19755
19756 // Check that the select condition doesn't reach either load. If so,
19757 // folding this will induce a cycle into the DAG. If not, this is safe to
19758 // xform, so create a select of the addresses.
19759
19762
19763 // Always fail if LLD and RLD are not independent. TheSelect is a
19764 // predecessor to all Nodes in question so we need not search past it.
19765
19766 Visited.insert(TheSelect);
19767 Worklist.push_back(LLD);
19768 Worklist.push_back(RLD);
19769
19770 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
19771 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
19772 return false;
19773
19774 SDValue Addr;
19775 if (TheSelect->getOpcode() == ISD::SELECT) {
19776 // We cannot do this optimization if any pair of {RLD, LLD} is a
19777 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
19778 // Loads, we only need to check if CondNode is a successor to one of the
19779 // loads. We can further avoid this if there's no use of their chain
19780 // value.
19781 SDNode *CondNode = TheSelect->getOperand(0).getNode();
19782 Worklist.push_back(CondNode);
19783
19784 if ((LLD->hasAnyUseOfValue(1) &&
19785 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19786 (RLD->hasAnyUseOfValue(1) &&
19787 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19788 return false;
19789
19790 Addr = DAG.getSelect(SDLoc(TheSelect),
19791 LLD->getBasePtr().getValueType(),
19792 TheSelect->getOperand(0), LLD->getBasePtr(),
19793 RLD->getBasePtr());
19794 } else { // Otherwise SELECT_CC
19795 // We cannot do this optimization if any pair of {RLD, LLD} is a
19796 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
19797 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
19798 // one of the loads. We can further avoid this if there's no use of their
19799 // chain value.
19800
19801 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
19802 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
19803 Worklist.push_back(CondLHS);
19804 Worklist.push_back(CondRHS);
19805
19806 if ((LLD->hasAnyUseOfValue(1) &&
19807 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19808 (RLD->hasAnyUseOfValue(1) &&
19809 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19810 return false;
19811
19812 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
19813 LLD->getBasePtr().getValueType(),
19814 TheSelect->getOperand(0),
19815 TheSelect->getOperand(1),
19816 LLD->getBasePtr(), RLD->getBasePtr(),
19817 TheSelect->getOperand(4));
19818 }
19819
19820 SDValue Load;
19821 // It is safe to replace the two loads if they have different alignments,
19822 // but the new load must be the minimum (most restrictive) alignment of the
19823 // inputs.
19824 unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
19825 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
19826 if (!RLD->isInvariant())
19827 MMOFlags &= ~MachineMemOperand::MOInvariant;
19828 if (!RLD->isDereferenceable())
19829 MMOFlags &= ~MachineMemOperand::MODereferenceable;
19830 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
19831 // FIXME: Discards pointer and AA info.
19832 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
19833 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
19834 MMOFlags);
19835 } else {
19836 // FIXME: Discards pointer and AA info.
19837 Load = DAG.getExtLoad(
19839 : LLD->getExtensionType(),
19840 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
19841 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
19842 }
19843
19844 // Users of the select now use the result of the load.
19845 CombineTo(TheSelect, Load);
19846
19847 // Users of the old loads now use the new load's chain. We know the
19848 // old-load value is dead now.
19849 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
19850 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
19851 return true;
19852 }
19853
19854 return false;
19855}
19856
19857/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
19858/// bitwise 'and'.
19859SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
19860 SDValue N1, SDValue N2, SDValue N3,
19861 ISD::CondCode CC) {
19862 // If this is a select where the false operand is zero and the compare is a
19863 // check of the sign bit, see if we can perform the "gzip trick":
19864 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
19865 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
19866 EVT XType = N0.getValueType();
19867 EVT AType = N2.getValueType();
19868 if (!isNullConstant(N3) || !XType.bitsGE(AType))
19869 return SDValue();
19870
19871 // If the comparison is testing for a positive value, we have to invert
19872 // the sign bit mask, so only do that transform if the target has a bitwise
19873 // 'and not' instruction (the invert is free).
19874 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
19875 // (X > -1) ? A : 0
19876 // (X > 0) ? X : 0 <-- This is canonical signed max.
19877 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
19878 return SDValue();
19879 } else if (CC == ISD::SETLT) {
19880 // (X < 0) ? A : 0
19881 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
19882 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
19883 return SDValue();
19884 } else {
19885 return SDValue();
19886 }
19887
19888 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
19889 // constant.
19890 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
19891 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19892 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
19893 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
19894 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
19895 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
19896 AddToWorklist(Shift.getNode());
19897
19898 if (XType.bitsGT(AType)) {
19899 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19900 AddToWorklist(Shift.getNode());
19901 }
19902
19903 if (CC == ISD::SETGT)
19904 Shift = DAG.getNOT(DL, Shift, AType);
19905
19906 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19907 }
19908
19909 SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
19910 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
19911 AddToWorklist(Shift.getNode());
19912
19913 if (XType.bitsGT(AType)) {
19914 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19915 AddToWorklist(Shift.getNode());
19916 }
19917
19918 if (CC == ISD::SETGT)
19919 Shift = DAG.getNOT(DL, Shift, AType);
19920
19921 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19922}
19923
19924/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
19925/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
19926/// in it. This may be a win when the constant is not otherwise available
19927/// because it replaces two constant pool loads with one.
19928SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
19929 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
19930 ISD::CondCode CC) {
19932 return SDValue();
19933
19934 // If we are before legalize types, we want the other legalization to happen
19935 // first (for example, to avoid messing with soft float).
19936 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
19937 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
19938 EVT VT = N2.getValueType();
19939 if (!TV || !FV || !TLI.isTypeLegal(VT))
19940 return SDValue();
19941
19942 // If a constant can be materialized without loads, this does not make sense.
19944 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
19945 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
19946 return SDValue();
19947
19948 // If both constants have multiple uses, then we won't need to do an extra
19949 // load. The values are likely around in registers for other users.
19950 if (!TV->hasOneUse() && !FV->hasOneUse())
19951 return SDValue();
19952
19953 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
19954 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
19955 Type *FPTy = Elts[0]->getType();
19956 const DataLayout &TD = DAG.getDataLayout();
19957
19958 // Create a ConstantArray of the two constants.
19959 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
19960 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
19961 TD.getPrefTypeAlignment(FPTy));
19962 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
19963
19964 // Get offsets to the 0 and 1 elements of the array, so we can select between
19965 // them.
19966 SDValue Zero = DAG.getIntPtrConstant(0, DL);
19967 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
19968 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
19969 SDValue Cond =
19970 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
19971 AddToWorklist(Cond.getNode());
19972 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
19973 AddToWorklist(CstOffset.getNode());
19974 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
19975 AddToWorklist(CPIdx.getNode());
19976 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
19978 DAG.getMachineFunction()), Alignment);
19979}
19980
19981/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
19982/// where 'cond' is the comparison specified by CC.
19983SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
19984 SDValue N2, SDValue N3, ISD::CondCode CC,
19985 bool NotExtCompare) {
19986 // (x ? y : y) -> y.
19987 if (N2 == N3) return N2;
19988
19989 EVT CmpOpVT = N0.getValueType();
19990 EVT CmpResVT = getSetCCResultType(CmpOpVT);
19991 EVT VT = N2.getValueType();
19992 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
19993 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19994 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
19995
19996 // Determine if the condition we're dealing with is constant.
19997 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
19998 AddToWorklist(SCC.getNode());
19999 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
20000 // fold select_cc true, x, y -> x
20001 // fold select_cc false, x, y -> y
20002 return !(SCCC->isNullValue()) ? N2 : N3;
20003 }
20004 }
20005
20006 if (SDValue V =
20007 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
20008 return V;
20009
20010 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
20011 return V;
20012
20013 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
20014 // where y is has a single bit set.
20015 // A plaintext description would be, we can turn the SELECT_CC into an AND
20016 // when the condition can be materialized as an all-ones register. Any
20017 // single bit-test can be materialized as an all-ones register with
20018 // shift-left and shift-right-arith.
20019 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
20020 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
20021 SDValue AndLHS = N0->getOperand(0);
20022 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
20023 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
20024 // Shift the tested bit over the sign bit.
20025 const APInt &AndMask = ConstAndRHS->getAPIntValue();
20026 SDValue ShlAmt =
20027 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
20028 getShiftAmountTy(AndLHS.getValueType()));
20029 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
20030
20031 // Now arithmetic right shift it all the way over, so the result is either
20032 // all-ones, or zero.
20033 SDValue ShrAmt =
20034 DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
20036 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
20037
20038 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
20039 }
20040 }
20041
20042 // fold select C, 16, 0 -> shl C, 4
20043 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
20044 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
20045
20046 if ((Fold || Swap) &&
20047 TLI.getBooleanContents(CmpOpVT) ==
20049 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
20050
20051 if (Swap) {
20052 CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
20053 std::swap(N2C, N3C);
20054 }
20055
20056 // If the caller doesn't want us to simplify this into a zext of a compare,
20057 // don't do it.
20058 if (NotExtCompare && N2C->isOne())
20059 return SDValue();
20060
20061 SDValue Temp, SCC;
20062 // zext (setcc n0, n1)
20063 if (LegalTypes) {
20064 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
20065 if (VT.bitsLT(SCC.getValueType()))
20066 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
20067 else
20068 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
20069 } else {
20070 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
20071 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
20072 }
20073
20074 AddToWorklist(SCC.getNode());
20075 AddToWorklist(Temp.getNode());
20076
20077 if (N2C->isOne())
20078 return Temp;
20079
20080 // shl setcc result by log2 n2c
20081 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
20082 DAG.getConstant(N2C->getAPIntValue().logBase2(),
20083 SDLoc(Temp),
20085 }
20086
20087 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
20088 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
20089 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
20090 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
20091 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
20092 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
20093 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
20094 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
20095 if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
20096 SDValue ValueOnZero = N2;
20097 SDValue Count = N3;
20098 // If the condition is NE instead of E, swap the operands.
20099 if (CC == ISD::SETNE)
20100 std::swap(ValueOnZero, Count);
20101 // Check if the value on zero is a constant equal to the bits in the type.
20102 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
20103 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
20104 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
20105 // legal, combine to just cttz.
20106 if ((Count.getOpcode() == ISD::CTTZ ||
20107 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
20108 N0 == Count.getOperand(0) &&
20109 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
20110 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
20111 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
20112 // legal, combine to just ctlz.
20113 if ((Count.getOpcode() == ISD::CTLZ ||
20114 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
20115 N0 == Count.getOperand(0) &&
20116 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
20117 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
20118 }
20119 }
20120 }
20121
20122 return SDValue();
20123}
20124
20125/// This is a stub for TargetLowering::SimplifySetCC.
20127 ISD::CondCode Cond, const SDLoc &DL,
20128 bool foldBooleans) {
20130 DagCombineInfo(DAG, Level, false, this);
20131 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
20132}
20133
20134/// Given an ISD::SDIV node expressing a divide by constant, return
20135/// a DAG expression to select that will generate the same value by multiplying
20136/// by a magic number.
20137/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20139 // when optimising for minimum size, we don't want to expand a div to a mul
20140 // and a shift.
20141 if (DAG.getMachineFunction().getFunction().hasMinSize())
20142 return SDValue();
20143
20145 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
20146 for (SDNode *N : Built)
20147 AddToWorklist(N);
20148 return S;
20149 }
20150
20151 return SDValue();
20152}
20153
20154/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
20155/// DAG expression that will generate the same value by right shifting.
20157 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
20158 if (!C)
20159 return SDValue();
20160
20161 // Avoid division by zero.
20162 if (C->isNullValue())
20163 return SDValue();
20164
20166 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
20167 for (SDNode *N : Built)
20168 AddToWorklist(N);
20169 return S;
20170 }
20171
20172 return SDValue();
20173}
20174
20175/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
20176/// expression that will generate the same value by multiplying by a magic
20177/// number.
20178/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20180 // when optimising for minimum size, we don't want to expand a div to a mul
20181 // and a shift.
20182 if (DAG.getMachineFunction().getFunction().hasMinSize())
20183 return SDValue();
20184
20186 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
20187 for (SDNode *N : Built)
20188 AddToWorklist(N);
20189 return S;
20190 }
20191
20192 return SDValue();
20193}
20194
20195/// Determines the LogBase2 value for a non-null input value using the
20196/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
20197SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
20198 EVT VT = V.getValueType();
20199 unsigned EltBits = VT.getScalarSizeInBits();
20200 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
20201 SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
20202 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
20203 return LogBase2;
20204}
20205
20206/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20207/// For the reciprocal, we need to find the zero of the function:
20208/// F(X) = A X - 1 [which has a zero at X = 1/A]
20209/// =>
20210/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
20211/// does not require additional intermediate precision]
20212SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
20213 if (Level >= AfterLegalizeDAG)
20214 return SDValue();
20215
20216 // TODO: Handle half and/or extended types?
20217 EVT VT = Op.getValueType();
20218 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
20219 return SDValue();
20220
20221 // If estimates are explicitly disabled for this function, we're done.
20223 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
20224 if (Enabled == TLI.ReciprocalEstimate::Disabled)
20225 return SDValue();
20226
20227 // Estimates may be explicitly enabled for this type with a custom number of
20228 // refinement steps.
20229 int Iterations = TLI.getDivRefinementSteps(VT, MF);
20230 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
20231 AddToWorklist(Est.getNode());
20232
20233 if (Iterations) {
20234 SDLoc DL(Op);
20235 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
20236
20237 // Newton iterations: Est = Est + Est (1 - Arg * Est)
20238 for (int i = 0; i < Iterations; ++i) {
20239 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
20240 AddToWorklist(NewEst.getNode());
20241
20242 NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
20243 AddToWorklist(NewEst.getNode());
20244
20245 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20246 AddToWorklist(NewEst.getNode());
20247
20248 Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
20249 AddToWorklist(Est.getNode());
20250 }
20251 }
20252 return Est;
20253 }
20254
20255 return SDValue();
20256}
20257
20258/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20259/// For the reciprocal sqrt, we need to find the zero of the function:
20260/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20261/// =>
20262/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
20263/// As a result, we precompute A/2 prior to the iteration loop.
20264SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
20265 unsigned Iterations,
20266 SDNodeFlags Flags, bool Reciprocal) {
20267 EVT VT = Arg.getValueType();
20268 SDLoc DL(Arg);
20269 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
20270
20271 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
20272 // this entire sequence requires only one FP constant.
20273 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
20274 AddToWorklist(HalfArg.getNode());
20275
20276 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
20277 AddToWorklist(HalfArg.getNode());
20278
20279 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
20280 for (unsigned i = 0; i < Iterations; ++i) {
20281 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
20282 AddToWorklist(NewEst.getNode());
20283
20284 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
20285 AddToWorklist(NewEst.getNode());
20286
20287 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
20288 AddToWorklist(NewEst.getNode());
20289
20290 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20291 AddToWorklist(Est.getNode());
20292 }
20293
20294 // If non-reciprocal square root is requested, multiply the result by Arg.
20295 if (!Reciprocal) {
20296 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
20297 AddToWorklist(Est.getNode());
20298 }
20299
20300 return Est;
20301}
20302
20303/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20304/// For the reciprocal sqrt, we need to find the zero of the function:
20305/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20306/// =>
20307/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
20308SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
20309 unsigned Iterations,
20310 SDNodeFlags Flags, bool Reciprocal) {
20311 EVT VT = Arg.getValueType();
20312 SDLoc DL(Arg);
20313 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
20314 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
20315
20316 // This routine must enter the loop below to work correctly
20317 // when (Reciprocal == false).
20318 assert(Iterations > 0);
20319
20320 // Newton iterations for reciprocal square root:
20321 // E = (E * -0.5) * ((A * E) * E + -3.0)
20322 for (unsigned i = 0; i < Iterations; ++i) {
20323 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
20324 AddToWorklist(AE.getNode());
20325
20326 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
20327 AddToWorklist(AEE.getNode());
20328
20329 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
20330 AddToWorklist(RHS.getNode());
20331
20332 // When calculating a square root at the last iteration build:
20333 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
20334 // (notice a common subexpression)
20335 SDValue LHS;
20336 if (Reciprocal || (i + 1) < Iterations) {
20337 // RSQRT: LHS = (E * -0.5)
20338 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
20339 } else {
20340 // SQRT: LHS = (A * E) * -0.5
20341 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
20342 }
20343 AddToWorklist(LHS.getNode());
20344
20345 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
20346 AddToWorklist(Est.getNode());
20347 }
20348
20349 return Est;
20350}
20351
20352/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
20353/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
20354/// Op can be zero.
20355SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
20356 bool Reciprocal) {
20357 if (Level >= AfterLegalizeDAG)
20358 return SDValue();
20359
20360 // TODO: Handle half and/or extended types?
20361 EVT VT = Op.getValueType();
20362 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
20363 return SDValue();
20364
20365 // If estimates are explicitly disabled for this function, we're done.
20367 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
20368 if (Enabled == TLI.ReciprocalEstimate::Disabled)
20369 return SDValue();
20370
20371 // Estimates may be explicitly enabled for this type with a custom number of
20372 // refinement steps.
20373 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
20374
20375 bool UseOneConstNR = false;
20376 if (SDValue Est =
20377 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
20378 Reciprocal)) {
20379 AddToWorklist(Est.getNode());
20380
20381 if (Iterations) {
20382 Est = UseOneConstNR
20383 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
20384 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
20385
20386 if (!Reciprocal) {
20387 // The estimate is now completely wrong if the input was exactly 0.0 or
20388 // possibly a denormal. Force the answer to 0.0 for those cases.
20389 SDLoc DL(Op);
20390 EVT CCVT = getSetCCResultType(VT);
20391 ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
20392 const Function &F = DAG.getMachineFunction().getFunction();
20393 Attribute Denorms = F.getFnAttribute("denormal-fp-math");
20394 if (Denorms.getValueAsString().equals("ieee")) {
20395 // fabs(X) < SmallestNormal ? 0.0 : Est
20396 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
20397 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
20398 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
20399 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
20400 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
20401 SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
20402 Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
20403 AddToWorklist(Fabs.getNode());
20404 AddToWorklist(IsDenorm.getNode());
20405 AddToWorklist(Est.getNode());
20406 } else {
20407 // X == 0.0 ? 0.0 : Est
20408 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
20409 SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
20410 Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
20411 AddToWorklist(IsZero.getNode());
20412 AddToWorklist(Est.getNode());
20413 }
20414 }
20415 }
20416 return Est;
20417 }
20418
20419 return SDValue();
20420}
20421
20422SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
20423 return buildSqrtEstimateImpl(Op, Flags, true);
20424}
20425
20426SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
20427 return buildSqrtEstimateImpl(Op, Flags, false);
20428}
20429
20430/// Return true if there is any possibility that the two addresses overlap.
20431bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
20432
20433 struct MemUseCharacteristics {
20434 bool IsVolatile;
20436 int64_t Offset;
20437 Optional<int64_t> NumBytes;
20438 MachineMemOperand *MMO;
20439 };
20440
20441 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
20442 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
20443 int64_t Offset = 0;
20444 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
20445 Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
20446 ? C->getSExtValue()
20447 : (LSN->getAddressingMode() == ISD::PRE_DEC)
20448 ? -1 * C->getSExtValue()
20449 : 0;
20450 return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
20451 Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
20452 LSN->getMemOperand()};
20453 }
20454 if (const auto *LN = cast<LifetimeSDNode>(N))
20455 return {false /*isVolatile*/, LN->getOperand(1),
20456 (LN->hasOffset()) ? LN->getOffset() : 0,
20457 (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
20459 (MachineMemOperand *)nullptr};
20460 // Default.
20461 return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
20462 Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
20463 };
20464
20465 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
20466 MUC1 = getCharacteristics(Op1);
20467
20468 // If they are to the same address, then they must be aliases.
20469 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
20470 MUC0.Offset == MUC1.Offset)
20471 return true;
20472
20473 // If they are both volatile then they cannot be reordered.
20474 if (MUC0.IsVolatile && MUC1.IsVolatile)
20475 return true;
20476
20477 if (MUC0.MMO && MUC1.MMO) {
20478 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
20479 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
20480 return false;
20481 }
20482
20483 // Try to prove that there is aliasing, or that there is no aliasing. Either
20484 // way, we can return now. If nothing can be proved, proceed with more tests.
20485 bool IsAlias;
20486 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
20487 DAG, IsAlias))
20488 return IsAlias;
20489
20490 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
20491 // either are not known.
20492 if (!MUC0.MMO || !MUC1.MMO)
20493 return true;
20494
20495 // If one operation reads from invariant memory, and the other may store, they
20496 // cannot alias. These should really be checking the equivalent of mayWrite,
20497 // but it only matters for memory nodes other than load /store.
20498 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
20499 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
20500 return false;
20501
20502 // If we know required SrcValue1 and SrcValue2 have relatively large
20503 // alignment compared to the size and offset of the access, we may be able
20504 // to prove they do not alias. This check is conservative for now to catch
20505 // cases created by splitting vector types.
20506 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
20507 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
20508 unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
20509 unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
20510 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
20511 MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
20512 *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
20513 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
20514 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
20515
20516 // There is no overlap between these relatively aligned accesses of
20517 // similar size. Return no alias.
20518 if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
20519 (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
20520 return false;
20521 }
20522
20523 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
20525 : DAG.getSubtarget().useAA();
20526#ifndef NDEBUG
20527 if (CombinerAAOnlyFunc.getNumOccurrences() &&
20529 UseAA = false;
20530#endif
20531
20532 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
20533 // Use alias analysis information.
20534 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
20535 int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
20536 int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
20537 AliasResult AAResult = AA->alias(
20538 MemoryLocation(MUC0.MMO->getValue(), Overlap0,
20539 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
20540 MemoryLocation(MUC1.MMO->getValue(), Overlap1,
20541 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
20542 if (AAResult == NoAlias)
20543 return false;
20544 }
20545
20546 // Otherwise we have to assume they alias.
20547 return true;
20548}
20549
20550/// Walk up chain skipping non-aliasing memory nodes,
20551/// looking for aliasing nodes and adding them to the Aliases vector.
20552void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
20553 SmallVectorImpl<SDValue> &Aliases) {
20554 SmallVector<SDValue, 8> Chains; // List of chains to visit.
20555 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
20556
20557 // Get alias information for node.
20558 const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();
20559
20560 // Starting off.
20561 Chains.push_back(OriginalChain);
20562 unsigned Depth = 0;
20563
20564 // Attempt to improve chain by a single step
20565 std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
20566 switch (C.getOpcode()) {
20567 case ISD::EntryToken:
20568 // No need to mark EntryToken.
20569 C = SDValue();
20570 return true;
20571 case ISD::LOAD:
20572 case ISD::STORE: {
20573 // Get alias information for C.
20574 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
20575 !cast<LSBaseSDNode>(C.getNode())->isVolatile();
20576 if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
20577 // Look further up the chain.
20578 C = C.getOperand(0);
20579 return true;
20580 }
20581 // Alias, so stop here.
20582 return false;
20583 }
20584
20585 case ISD::CopyFromReg:
20586 // Always forward past past CopyFromReg.
20587 C = C.getOperand(0);
20588 return true;
20589
20591 case ISD::LIFETIME_END: {
20592 // We can forward past any lifetime start/end that can be proven not to
20593 // alias the memory access.
20594 if (!isAlias(N, C.getNode())) {
20595 // Look further up the chain.
20596 C = C.getOperand(0);
20597 return true;
20598 }
20599 return false;
20600 }
20601 default:
20602 return false;
20603 }
20604 };
20605
20606 // Look at each chain and determine if it is an alias. If so, add it to the
20607 // aliases list. If not, then continue up the chain looking for the next
20608 // candidate.
20609 while (!Chains.empty()) {
20610 SDValue Chain = Chains.pop_back_val();
20611
20612 // Don't bother if we've seen Chain before.
20613 if (!Visited.insert(Chain.getNode()).second)
20614 continue;
20615
20616 // For TokenFactor nodes, look at each operand and only continue up the
20617 // chain until we reach the depth limit.
20618 //
20619 // FIXME: The depth check could be made to return the last non-aliasing
20620 // chain we found before we hit a tokenfactor rather than the original
20621 // chain.
20622 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
20623 Aliases.clear();
20624 Aliases.push_back(OriginalChain);
20625 return;
20626 }
20627
20628 if (Chain.getOpcode() == ISD::TokenFactor) {
20629 // We have to check each of the operands of the token factor for "small"
20630 // token factors, so we queue them up. Adding the operands to the queue
20631 // (stack) in reverse order maintains the original order and increases the
20632 // likelihood that getNode will find a matching token factor (CSE.)
20633 if (Chain.getNumOperands() > 16) {
20634 Aliases.push_back(Chain);
20635 continue;
20636 }
20637 for (unsigned n = Chain.getNumOperands(); n;)
20638 Chains.push_back(Chain.getOperand(--n));
20639 ++Depth;
20640 continue;
20641 }
20642 // Everything else
20643 if (ImproveChain(Chain)) {
20644 // Updated Chain Found, Consider new chain if one exists.
20645 if (Chain.getNode())
20646 Chains.push_back(Chain);
20647 ++Depth;
20648 continue;
20649 }
20650 // No Improved Chain Possible, treat as Alias.
20651 Aliases.push_back(Chain);
20652 }
20653}
20654
20655/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
20656/// (aliasing node.)
20657SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
20658 if (OptLevel == CodeGenOpt::None)
20659 return OldChain;
20660
20661 // Ops for replacing token factor.
20663
20664 // Accumulate all the aliases to this node.
20665 GatherAllAliases(N, OldChain, Aliases);
20666
20667 // If no operands then chain to entry token.
20668 if (Aliases.size() == 0)
20669 return DAG.getEntryNode();
20670
20671 // If a single operand then chain to it. We don't need to revisit it.
20672 if (Aliases.size() == 1)
20673 return Aliases[0];
20674
20675 // Construct a custom tailored token factor.
20676 return DAG.getTokenFactor(SDLoc(N), Aliases);
20677}
20678
20679namespace {
20680// TODO: Replace with with std::monostate when we move to C++17.
20681struct UnitT { } Unit;
20682bool operator==(const UnitT &, const UnitT &) { return true; }
20683bool operator!=(const UnitT &, const UnitT &) { return false; }
20684} // namespace
20685
20686// This function tries to collect a bunch of potentially interesting
20687// nodes to improve the chains of, all at once. This might seem
20688// redundant, as this function gets called when visiting every store
20689// node, so why not let the work be done on each store as it's visited?
20690//
20691// I believe this is mainly important because MergeConsecutiveStores
20692// is unable to deal with merging stores of different sizes, so unless
20693// we improve the chains of all the potential candidates up-front
20694// before running MergeConsecutiveStores, it might only see some of
20695// the nodes that will eventually be candidates, and then not be able
20696// to go from a partially-merged state to the desired final
20697// fully-merged state.
20698
20699bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
20700 SmallVector<StoreSDNode *, 8> ChainedStores;
20701 StoreSDNode *STChain = St;
20702 // Intervals records which offsets from BaseIndex have been covered. In
20703 // the common case, every store writes to the immediately previous address
20704 // space and thus merged with the previous interval at insertion time.
20705
20706 using IMap =
20708 IMap::Allocator A;
20709 IMap Intervals(A);
20710
20711 // This holds the base pointer, index, and the offset in bytes from the base
20712 // pointer.
20714
20715 // We must have a base and an offset.
20716 if (!BasePtr.getBase().getNode())
20717 return false;
20718
20719 // Do not handle stores to undef base pointers.
20720 if (BasePtr.getBase().isUndef())
20721 return false;
20722
20723 // Add ST's interval.
20724 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
20725
20726 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
20727 // If the chain has more than one use, then we can't reorder the mem ops.
20728 if (!SDValue(Chain, 0)->hasOneUse())
20729 break;
20730 if (Chain->isVolatile() || Chain->isIndexed())
20731 break;
20732
20733 // Find the base pointer and offset for this memory node.
20734 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
20735 // Check that the base pointer is the same as the original one.
20736 int64_t Offset;
20737 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
20738 break;
20739 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
20740 // Make sure we don't overlap with other intervals by checking the ones to
20741 // the left or right before inserting.
20742 auto I = Intervals.find(Offset);
20743 // If there's a next interval, we should end before it.
20744 if (I != Intervals.end() && I.start() < (Offset + Length))
20745 break;
20746 // If there's a previous interval, we should start after it.
20747 if (I != Intervals.begin() && (--I).stop() <= Offset)
20748 break;
20749 Intervals.insert(Offset, Offset + Length, Unit);
20750
20751 ChainedStores.push_back(Chain);
20752 STChain = Chain;
20753 }
20754
20755 // If we didn't find a chained store, exit.
20756 if (ChainedStores.size() == 0)
20757 return false;
20758
20759 // Improve all chained stores (St and ChainedStores members) starting from
20760 // where the store chain ended and return single TokenFactor.
20761 SDValue NewChain = STChain->getChain();
20763 for (unsigned I = ChainedStores.size(); I;) {
20764 StoreSDNode *S = ChainedStores[--I];
20765 SDValue BetterChain = FindBetterChain(S, NewChain);
20766 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
20767 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
20768 TFOps.push_back(SDValue(S, 0));
20769 ChainedStores[I] = S;
20770 }
20771
20772 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
20773 SDValue BetterChain = FindBetterChain(St, NewChain);
20774 SDValue NewST;
20775 if (St->isTruncatingStore())
20776 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
20777 St->getBasePtr(), St->getMemoryVT(),
20778 St->getMemOperand());
20779 else
20780 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
20781 St->getBasePtr(), St->getMemOperand());
20782
20783 TFOps.push_back(NewST);
20784
20785 // If we improved every element of TFOps, then we've lost the dependence on
20786 // NewChain to successors of St and we need to add it back to TFOps. Do so at
20787 // the beginning to keep relative order consistent with FindBetterChains.
20788 auto hasImprovedChain = [&](SDValue ST) -> bool {
20789 return ST->getOperand(0) != NewChain;
20790 };
20791 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
20792 if (AddNewChain)
20793 TFOps.insert(TFOps.begin(), NewChain);
20794
20795 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
20796 CombineTo(St, TF);
20797
20798 AddToWorklist(STChain);
20799 // Add TF operands worklist in reverse order.
20800 for (auto I = TF->getNumOperands(); I;)
20801 AddToWorklist(TF->getOperand(--I).getNode());
20802 AddToWorklist(TF.getNode());
20803 return true;
20804}
20805
20806bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
20807 if (OptLevel == CodeGenOpt::None)
20808 return false;
20809
20811
20812 // We must have a base and an offset.
20813 if (!BasePtr.getBase().getNode())
20814 return false;
20815
20816 // Do not handle stores to undef base pointers.
20817 if (BasePtr.getBase().isUndef())
20818 return false;
20819
20820 // Directly improve a chain of disjoint stores starting at St.
20821 if (parallelizeChainedStores(St))
20822 return true;
20823
20824 // Improve St's Chain..
20825 SDValue BetterChain = FindBetterChain(St, St->getChain());
20826 if (St->getChain() != BetterChain) {
20827 replaceStoreChain(St, BetterChain);
20828 return true;
20829 }
20830 return false;
20831}
20832
20833/// This is the entry point for the file.
20835 CodeGenOpt::Level OptLevel) {
20836 /// This is the main entry point to this class.
20837 DAGCombiner(*this, AA, OptLevel).Run(Level);
20838}
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static uint64_t * getMemory(unsigned numWords)
A utility function for allocating memory and checking for allocation failure.
Definition: APInt.cpp:45
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool isLoad(int Opcode)
This file contains the simple types necessary to represent the attributes associated with functions a...
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< ShadowStackGC > C("shadow-stack", "Very portable GC for uncooperative code generators")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static int numVectorEltsOrZero(EVT T)
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, unsigned Depth=0)
If isNegatibleForFree returns true, return the newly negated expression.
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, APFloat(*Op)(const APFloat &, const APFloat &))
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static const Optional< ByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, bool Root=false)
Recursively traverses the expression calculating the origin of the requested byte of the given value.
static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG)
static char isNegatibleForFree(SDValue Op, bool LegalOperations, const TargetLowering &TLI, const TargetOptions *Options, bool ForCodeSize, unsigned Depth=0)
Return 1 if we can compute the negated form of the specified expression for the same cost as the expr...
static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propapagtion pattern try to break it up to generate somet...
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI)
Return true if 'Use' is a load or a store that uses N as its base pointer and that N may be folded in...
static unsigned BigEndianByteAt(unsigned BW, unsigned i)
static SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N)
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, bool LegalOperations)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::pair< SDValue, SDValue > SplitVSETCC(const SDNode *N, SelectionDAG &DAG)
static SDValue stripTruncAndExt(SDValue Value)
static bool isContractable(SDNode *N)
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG)
static SDNode * ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V)
static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static Optional< bool > isBigEndian(const SmallVector< int64_t, 4 > &ByteOffsets, int64_t FirstOffset)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
Generate Min/Max node.
static unsigned LittleEndianByteAt(unsigned BW, unsigned i)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue flipBoolean(SDValue V, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static ManagedStatic< DebugCounter > DC
#define LLVM_DEBUG(X)
Definition: Debug.h:122
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:877
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
unsigned second
iv Induction Variable Users
Definition: IVUsers.cpp:51
static Value * simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv)
Check for common or similar folds of integer division or integer remainder.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
uint32_t Size
Definition: Profile.cpp:46
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static MachineMemOperand * getMachineMemOperand(MachineFunction &MF, FrameIndexSDNode &FI)
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:168
This file describes how to lower LLVM code to machine code.
static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, MachineDominatorTree &MDT, LiveIntervals &LIS)
static uint32_t getAlignment(const MCSectionCOFF &Sec)
static uint32_t Concat[]
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
bool isNegative() const
Definition: APFloat.h:1157
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1140
const fltSemantics & getSemantics() const
Definition: APFloat.h:1165
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:934
APInt bitcastToAPInt() const
Definition: APFloat.h:1104
void changeSign()
Definition: APFloat.h:1060
Class for arbitrary precision integers.
Definition: APInt.h:69
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1709
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:860
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:554
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:442
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:878
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1532
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:813
void setBit(unsigned BitPosition)
Set a given bit to 1.
Definition: APInt.h:1402
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1799
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1254
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:606
static APInt getNullValue(unsigned numBits)
Get the '0' value.
Definition: APInt.h:568
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1184
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:363
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1320
bool isOneValue() const
Determine if this is a value of 1.
Definition: APInt.h:410
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:525
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1631
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1595
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:345
unsigned logBase2() const
Definition: APInt.h:1747
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:395
bool isNullValue() const
Determine if all bits are clear.
Definition: APInt.h:405
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:477
bool isMask(unsigned numBits) const
Definition: APInt.h:494
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:992
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1328
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:463
APInt zextOrSelf(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:894
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:647
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:635
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:404
unsigned countTrailingOnes() const
Count the number of trailing one bits.
Definition: APInt.h:1645
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:587
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1574
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:977
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:970
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1292
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:223
Helper struct to parse and store a memory address as base + index + offset.
bool contains(const SelectionDAG &DAG, int64_t BitSize, const BaseIndexOffset &Other, int64_t OtherBitSize, int64_t &BitOffset) const
static bool computeAliasing(const SDNode *Op0, const Optional< int64_t > NumBytes0, const SDNode *Op1, const Optional< int64_t > NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
bool equalBaseIndex(const BaseIndexOffset &Other, const SelectionDAG &DAG, int64_t &Off) const
A "pseudo-class" with methods for operating on BUILD_VECTORs.
ISD::CondCode get() const
const APFloat & getValueAPF() const
const ConstantFP * getConstantFPValue() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX)
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:99
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:756
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:232
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:746
bool isBigEndian() const
Definition: DataLayout.h:233
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:469
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
bool erase(const KeyT &Val)
Definition: DenseMap.h:298
iterator end()
Definition: DenseMap.h:108
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
This class is used to form a handle around another node that is persistent and is updated across invo...
NodeT & get() const
get - Dereference as a NodeT reference.
Definition: IntervalMap.h:526
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Machine Value Type.
SimpleValueType SimpleTy
unsigned getSizeInBits() const
static MVT getIntegerVT(unsigned BitWidth)
static mvt_range all_valuetypes()
SimpleValueType Iteration.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getPassThru() const
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
bool isVolatile() const
unsigned getOriginalAlignment() const
Returns alignment and volatility of the memory access.
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
unsigned getAlignment() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:290
bool hasValue() const
Definition: Optional.h:259
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
unsigned getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
op_iterator op_end() const
const SDNodeFlags getFlags() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
unsigned getScalarValueSizeInBits() const
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:470
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:976
bool isKnownNeverZero(SDValue Op) const
Test whether the given SDValue is known to contain non-zero value(s).
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:415
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N)
Test whether the given value is a constant int or similar node.
unsigned InferPtrAlignment(SDValue Ptr) const
Infer alignment of a load / store address.
void ReplaceAllUsesOfValuesWith(const SDValue *From, const SDValue *To, unsigned Num)
Like ReplaceAllUsesOfValueWith, but for multiple values at once.
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO)
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned char TargetFlags=0)
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, bool IsTruncating=false, bool IsCompressing=false)
OverflowKind computeOverflowKind(SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 node can overflow.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:416
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:878
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:750
void DeleteNode(SDNode *N)
Remove the specified node from the system.
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts)
Test whether V has a splatted value for all the demanded elements.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, const SDLoc &DL)
Returns sum of the base pointer and offset.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:413
SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits)
See if the specified operand can be simplified with the knowledge that only the bits specified by Dem...
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags=SDNodeFlags())
Get the specified node if it's already available, or else return NULL.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:579
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provides VTs and return the low/high part.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, SDNode *N1, SDNode *N2)
bool isKnownToBeAPowerOfTwo(SDValue Val) const
Test if the given value is known to have exactly one bit set.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::LoadExtType, bool IsExpanding=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:414
SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, const SDNodeFlags Flags=SDNodeFlags())
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:462
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:988
void Combine(CombineLevel Level, AliasAnalysis *AA, CodeGenOpt::Level OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue is known to never be NaN.
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:417
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:410
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
SDValue makeEquivalentMemoryOrdering(LoadSDNode *Old, SDValue New)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVMContext * getContext() const
Definition: SelectionDAG.h:420
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:479
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:963
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:473
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
A vector that has set insertion semantics.
Definition: SetVector.h:40
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
LLVM_NODISCARD T pop_back_val()
Definition: SetVector.h:227
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_type size() const
Definition: SmallPtrSet.h:92
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:377
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:417
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
size_t size() const
Definition: SmallVector.h:52
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:315
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
iterator erase(const_iterator CI)
Definition: SmallVector.h:434
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:320
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:467
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
void resize(size_type N)
Definition: SmallVector.h:344
void push_back(const T &Elt)
Definition: SmallVector.h:211
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:144
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:837
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
LLVM_NODISCARD bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:160
void reassociateOps(MachineInstr &Root, MachineInstr &Prev, MachineCombinerPattern Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const
Attempt to reassociate \P Root and \P Prev according to \P Pattern to reduce critical path length.
bool has(LibFunc F) const
Tests whether a library function is available.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool hasPairedLoad(EVT, unsigned &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always beneficiates from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool decomposeMulByConstant(EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool hasBitPreservingFPLogic(EVT VT) const
Return true if it is safe to transform an integer-domain bitwise operation into the equivalent floati...
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
unsigned getGatherAllAliasesMaxDepth() const
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
virtual bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of a non-zero vector constant with the give...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isNarrowingProfitable(EVT, EVT) const
Return true if it's profitable to narrow operations of type VT1 to VT2.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool isFMAFasterThanFMulAndFAdd(EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, unsigned Alignment=1, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
bool isConstFalseVal(const SDNode *N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(const SDNode *N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount though its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const SelectionDAGTargetInfo * getSelectionDAGInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
User * getUser() const LLVM_READONLY
Returns the User that contains this Use.
Definition: Use.cpp:40
Value * getOperand(unsigned i) const
Definition: User.h:169
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:72
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
use_iterator use_begin()
Definition: Value.h:338
bool use_empty() const
Definition: Value.h:322
iterator_range< use_iterator > uses()
Definition: Value.h:354
#define INT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:38
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:467
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:440
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:41
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:909
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:377
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:437
@ ConstantFP
Definition: ISDOpcodes.h:60
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:330
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:628
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:642
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:495
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:326
@ GlobalAddress
Definition: ISDOpcodes.h:61
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:502
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:369
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:903
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:287
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:417
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:532
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:909
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:605
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:595
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:913
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:489
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:399
@ VECREDUCE_FADD
These reductions are non-strict, and have a single vector operand.
Definition: ISDOpcodes.h:901
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:440
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:903
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:241
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:475
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:610
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:678
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:253
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:272
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:444
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:177
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:909
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:183
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:174
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:250
@ FP_ROUND_INREG
X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and rounds it to a floating point val...
Definition: ISDOpcodes.h:577
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:907
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:404
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:434
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:391
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:382
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition: ISDOpcodes.h:628
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:44
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:363
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:169
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:492
@ TargetConstantFP
Definition: ISDOpcodes.h:125
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:459
@ VECREDUCE_XOR
Definition: ISDOpcodes.h:908
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:622
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:256
@ VECREDUCE_AND
Definition: ISDOpcodes.h:908
@ TargetFrameIndex
Definition: ISDOpcodes.h:132
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:521
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:510
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:408
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:877
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:580
@ VECREDUCE_OR
Definition: ISDOpcodes.h:908
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:453
@ VECREDUCE_MUL
Definition: ISDOpcodes.h:907
@ LIFETIME_END
Definition: ISDOpcodes.h:877
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:781
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:909
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:633
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:548
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:124
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:411
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:213
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:901
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:356
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:49
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:562
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:543
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:498
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:672
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:56
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:336
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:264
@ AssertZext
Definition: ISDOpcodes.h:56
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable,...
Definition: ISDOpcodes.h:351
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant BUI...
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, bool isInteger)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, bool isInteger)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1028
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:950
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:995
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:970
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
@ VecLoad
Definition: NVPTX.h:67
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
DiagnosticInfoOptimizationBase::Argument NV
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:644
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:343
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1192
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1966
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:392
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1968
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:342
std::string & operator+=(std::string &buffer, StringRef string)
Definition: StringRef.h:888
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 maximum semantics.
Definition: APFloat.h:1272
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:433
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:544
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:663
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1199
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1248
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:344
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:188
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1122
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:119
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:158
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
std::enable_if<!is_simple_type< Y >::value, typenamecast_retty< X, constY >::ret_type >::type cast(const Y &Val)
Definition: Casting.h:249
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:477
bool isNullOrNullSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
@ Z
zlib style complession
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1237
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
auto count_if(R &&Range, UnaryPredicate P) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1266
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1251
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition: APFloat.h:1229
bool isAllOnesOrAllOnesSplat(SDValue V)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:78
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:84
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 minimum semantics.
Definition: APFloat.h:1259
bool isOneOrOneSplat(SDValue V)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:341
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:190
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:195
Extended Value Type.
Definition: ValueTypes.h:33
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:95
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:72
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:240
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:135
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:358
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:63
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:57
unsigned getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:309
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:259
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:234
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:222
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:217
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:216
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:130
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:145
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:246
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:140
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:39
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:156
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
RetVal visit(const SCEV *S)
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoInfs() const
bool hasNoNaNs() const
bool hasNoSignedZeros() const
bool hasApproximateFuncs() const
bool hasAllowReciprocal() const
bool hasAllowReassociation() const
void setNoUnsignedWrap(bool b)
bool hasVectorReduction() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:289
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, const T &arg, const Ts &...args)
Recursive, variadic combining method.
Definition: Hashing.h:553